[XFS] Fix to prevent the notorious 'NULL files' problem after a crash.

Linux Kernel Mailing List Tue, 08 May 2007 13:13:18 -0700

Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=ba87ea699ebd9dd577bf055ebc4a98200e337542
Commit:     ba87ea699ebd9dd577bf055ebc4a98200e337542
Parent:     2a32963130aec5e157b58ff7dfa3dfa1afdf7ca1
Author:     Lachlan McIlroy <[EMAIL PROTECTED]>
AuthorDate: Tue May 8 13:49:46 2007 +1000
Committer:  Tim Shimmin <[EMAIL PROTECTED]>
CommitDate: Tue May 8 13:49:46 2007 +1000


    [XFS] Fix to prevent the notorious 'NULL files' problem after a crash.
    
    The problem that has been addressed is that of synchronising updates of
    the file size with writes that extend a file. Without the fix the update
    of a file's size, as a result of a write beyond eof, is independent of
    when the cached data is flushed to disk. Often the file size update would
    be written to the filesystem log before the data is flushed to disk. When
    a system crashes between these two events and the filesystem log is
    replayed on mount the file's size will be set but since the contents never
    made it to disk the file is full of holes. If some of the cached data was
    flushed to disk then it may just be a section of the file at the end that
    has holes.
    
    There are existing fixes to help alleviate this problem, particularly in
    the case where a file has been truncated, that force cached data to be
    flushed to disk when the file is closed. If the system crashes while the
    file(s) are still open then this flushing will never occur.
    
    The fix that we have implemented is to introduce a second file size,
    called the in-memory file size, that represents the current file size as
    viewed by the user. The existing file size, called the on-disk file size,
    is the one that get's written to the filesystem log and we only update it
    when it is safe to do so. When we write to a file beyond eof we only
    update the in- memory file size in the write operation. Later when the I/O
    operation, that flushes the cached data to disk completes, an I/O
    completion routine will update the on-disk file size. The on-disk file
    size will be updated to the maximum offset of the I/O or to the value of
    the in-memory file size if the I/O includes eof.
    
    SGI-PV: 958522
    SGI-Modid: xfs-linux-melb:xfs-kern:28322a
    
    Signed-off-by: Lachlan McIlroy <[EMAIL PROTECTED]>
    Signed-off-by: David Chinner <[EMAIL PROTECTED]>
    Signed-off-by: Tim Shimmin <[EMAIL PROTECTED]>
---
 fs/xfs/linux-2.6/xfs_aops.c |   89 ++++++++++++++++++++++++++++++++++++------
 fs/xfs/linux-2.6/xfs_lrw.c  |   91 ++++++++++++++++++++++++++-----------------
 fs/xfs/xfs_bmap.c           |   14 ++++--
 fs/xfs/xfs_inode.c          |   48 ++++++++++++++++++-----
 fs/xfs/xfs_inode.h          |    3 +
 fs/xfs/xfs_iocore.c         |    2 +-
 fs/xfs/xfs_iomap.c          |    8 ++--
 fs/xfs/xfs_iomap.h          |    1 +
 fs/xfs/xfs_vnodeops.c       |   40 +++++++++---------
 9 files changed, 208 insertions(+), 88 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 143ffc8..4475588 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -141,9 +141,46 @@ xfs_destroy_ioend(
 }
 
 /*
+ * Update on-disk file size now that data has been written to disk.
+ * The current in-memory file size is i_size.  If a write is beyond
+ * eof io_new_size will be the intended file size until i_size is
+ * updated.  If this write does not extend all the way to the valid
+ * file size then restrict this update to the end of the write.
+ */
+STATIC void
+xfs_setfilesize(
+       xfs_ioend_t             *ioend)
+{
+       xfs_inode_t             *ip;
+       xfs_fsize_t             isize;
+       xfs_fsize_t             bsize;
+
+       ip = xfs_vtoi(ioend->io_vnode);
+
+       ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
+       ASSERT(ioend->io_type != IOMAP_READ);
+
+       if (unlikely(ioend->io_error))
+               return;
+
+       bsize = ioend->io_offset + ioend->io_size;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       isize = MAX(ip->i_size, ip->i_iocore.io_new_size);
+       isize = MIN(isize, bsize);
+
+       if (ip->i_d.di_size < isize) {
+               ip->i_d.di_size = isize;
+               ip->i_update_core = 1;
+               ip->i_update_size = 1;
+       }
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+}
+
+/*
  * Buffered IO write completion for delayed allocate extents.
- * TODO: Update ondisk isize now that we know the file data
- * has been flushed (i.e. the notorious "NULL file" problem).
  */
 STATIC void
 xfs_end_bio_delalloc(
@@ -152,6 +189,7 @@ xfs_end_bio_delalloc(
        xfs_ioend_t             *ioend =
                container_of(work, xfs_ioend_t, io_work);
 
+       xfs_setfilesize(ioend);
        xfs_destroy_ioend(ioend);
 }
 
@@ -165,6 +203,7 @@ xfs_end_bio_written(
        xfs_ioend_t             *ioend =
                container_of(work, xfs_ioend_t, io_work);
 
+       xfs_setfilesize(ioend);
        xfs_destroy_ioend(ioend);
 }
 
@@ -184,8 +223,23 @@ xfs_end_bio_unwritten(
        xfs_off_t               offset = ioend->io_offset;
        size_t                  size = ioend->io_size;
 
-       if (likely(!ioend->io_error))
+       if (likely(!ioend->io_error)) {
                bhv_vop_bmap(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL);
+               xfs_setfilesize(ioend);
+       }
+       xfs_destroy_ioend(ioend);
+}
+
+/*
+ * IO read completion for regular, written extents.
+ */
+STATIC void
+xfs_end_bio_read(
+       struct work_struct      *work)
+{
+       xfs_ioend_t             *ioend =
+               container_of(work, xfs_ioend_t, io_work);
+
        xfs_destroy_ioend(ioend);
 }
 
@@ -224,6 +278,8 @@ xfs_alloc_ioend(
                INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten);
        else if (type == IOMAP_DELAY)
                INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc);
+       else if (type == IOMAP_READ)
+               INIT_WORK(&ioend->io_work, xfs_end_bio_read);
        else
                INIT_WORK(&ioend->io_work, xfs_end_bio_written);
 
@@ -913,7 +969,7 @@ xfs_page_state_convert(
        bh = head = page_buffers(page);
        offset = page_offset(page);
        flags = -1;
-       type = 0;
+       type = IOMAP_READ;
 
        /* TODO: cleanup count and page_dirty */
 
@@ -999,7 +1055,7 @@ xfs_page_state_convert(
                         * That means it must already have extents allocated
                         * underneath it. Map the extent by reading it.
                         */
-                       if (!iomap_valid || type != 0) {
+                       if (!iomap_valid || type != IOMAP_READ) {
                                flags = BMAPI_READ;
                                size = xfs_probe_cluster(inode, page, bh,
                                                                head, 1);
@@ -1010,7 +1066,7 @@ xfs_page_state_convert(
                                iomap_valid = xfs_iomap_valid(&iomap, offset);
                        }
 
-                       type = 0;
+                       type = IOMAP_READ;
                        if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
                                ASSERT(buffer_mapped(bh));
                                if (iomap_valid)
@@ -1356,12 +1412,21 @@ xfs_end_io_direct(
         * completion handler in the future, in which case all this can
         * go away.
         */
-       if (private && size > 0) {
-               ioend->io_offset = offset;
-               ioend->io_size = size;
+       ioend->io_offset = offset;
+       ioend->io_size = size;
+       if (ioend->io_type == IOMAP_READ) {
+               xfs_finish_ioend(ioend);
+       } else if (private && size > 0) {
                xfs_finish_ioend(ioend);
        } else {
-               xfs_destroy_ioend(ioend);
+               /*
+                * A direct I/O write ioend starts it's life in unwritten
+                * state in case they map an unwritten extent.  This write
+                * didn't map an unwritten extent so switch it's completion
+                * handler.
+                */
+               INIT_WORK(&ioend->io_work, xfs_end_bio_written);
+               xfs_finish_ioend(ioend);
        }
 
        /*
@@ -1392,15 +1457,15 @@ xfs_vm_direct_IO(
        if (error)
                return -error;
 
-       iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
-
        if (rw == WRITE) {
+               iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
                ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
                        iomap.iomap_target->bt_bdev,
                        iov, offset, nr_segs,
                        xfs_get_blocks_direct,
                        xfs_end_io_direct);
        } else {
+               iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
                ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
                        iomap.iomap_target->bt_bdev,
                        iov, offset, nr_segs,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 80fe312..82ab792 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -224,7 +224,7 @@ xfs_read(
                                mp->m_rtdev_targp : mp->m_ddev_targp;
                if ((*offset & target->bt_smask) ||
                    (size & target->bt_smask)) {
-                       if (*offset == ip->i_d.di_size) {
+                       if (*offset == ip->i_size) {
                                return (0);
                        }
                        return -XFS_ERROR(EINVAL);
@@ -387,9 +387,10 @@ xfs_splice_write(
 {
        xfs_inode_t             *ip = XFS_BHVTOI(bdp);
        xfs_mount_t             *mp = ip->i_mount;
+       xfs_iocore_t            *io = &ip->i_iocore;
        ssize_t                 ret;
        struct inode            *inode = outfilp->f_mapping->host;
-       xfs_fsize_t             isize;
+       xfs_fsize_t             isize, new_size;
 
        XFS_STATS_INC(xs_write_calls);
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -410,6 +411,14 @@ xfs_splice_write(
                        return -error;
                }
        }
+
+       new_size = *ppos + count;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if (new_size > ip->i_size)
+               io->io_new_size = new_size;
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
        xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore,
                           pipe, count, *ppos, ioflags);
        ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
@@ -420,14 +429,18 @@ xfs_splice_write(
        if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
                *ppos = isize;
 
-       if (*ppos > ip->i_d.di_size) {
+       if (*ppos > ip->i_size) {
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-               if (*ppos > ip->i_d.di_size) {
-                       ip->i_d.di_size = *ppos;
-                       i_size_write(inode, *ppos);
-                       ip->i_update_core = 1;
-                       ip->i_update_size = 1;
-               }
+               if (*ppos > ip->i_size)
+                       ip->i_size = *ppos;
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+
+       if (io->io_new_size) {
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               io->io_new_size = 0;
+               if (ip->i_d.di_size > ip->i_size)
+                       ip->i_d.di_size = ip->i_size;
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
        }
        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -711,8 +724,6 @@ start:
                goto out_unlock_mutex;
        }
 
-       isize = i_size_read(inode);
-
        if (ioflags & IO_ISDIRECT) {
                xfs_buftarg_t   *target =
                        (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
@@ -723,7 +734,7 @@ start:
                        return XFS_ERROR(-EINVAL);
                }
 
-               if (!need_i_mutex && (VN_CACHED(vp) || pos > isize)) {
+               if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) {
                        xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
                        iolock = XFS_IOLOCK_EXCL;
                        locktype = VRWLOCK_WRITE;
@@ -735,7 +746,7 @@ start:
        }
 
        new_size = pos + count;
-       if (new_size > isize)
+       if (new_size > xip->i_size)
                io->io_new_size = new_size;
 
        if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
@@ -751,8 +762,7 @@ start:
                                      pos, count,
                                      dmflags, &locktype);
                if (error) {
-                       xfs_iunlock(xip, iolock);
-                       goto out_unlock_mutex;
+                       goto out_unlock_internal;
                }
                xfs_ilock(xip, XFS_ILOCK_EXCL);
                eventsent = 1;
@@ -764,9 +774,8 @@ start:
                 * event prevents another call to XFS_SEND_DATA, which is
                 * what allows the size to change in the first place.
                 */
-               if ((file->f_flags & O_APPEND) && savedsize != isize) {
+               if ((file->f_flags & O_APPEND) && savedsize != xip->i_size)
                        goto start;
-               }
        }
 
        if (likely(!(ioflags & IO_INVIS))) {
@@ -784,11 +793,11 @@ start:
         * to zero it out up to the new size.
         */
 
-       if (pos > isize) {
-               error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize);
+       if (pos > xip->i_size) {
+               error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, xip->i_size);
                if (error) {
-                       xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
-                       goto out_unlock_mutex;
+                       xfs_iunlock(xip, XFS_ILOCK_EXCL);
+                       goto out_unlock_internal;
                }
        }
        xfs_iunlock(xip, XFS_ILOCK_EXCL);
@@ -808,8 +817,7 @@ start:
                if (likely(!error))
                        error = -remove_suid(file->f_path.dentry);
                if (unlikely(error)) {
-                       xfs_iunlock(xip, iolock);
-                       goto out_unlock_mutex;
+                       goto out_unlock_internal;
                }
        }
 
@@ -879,12 +887,12 @@ retry:
                error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
                                DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
                                0, 0, 0); /* Delay flag intentionally  unused */
-               if (error)
-                       goto out_nounlocks;
                if (need_i_mutex)
                        mutex_lock(&inode->i_mutex);
                xfs_rwlock(bdp, locktype);
-               pos = xip->i_d.di_size;
+               if (error)
+                       goto out_unlock_internal;
+               pos = xip->i_size;
                ret = 0;
                goto retry;
        }
@@ -893,14 +901,10 @@ retry:
        if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
                *offset = isize;
 
-       if (*offset > xip->i_d.di_size) {
+       if (*offset > xip->i_size) {
                xfs_ilock(xip, XFS_ILOCK_EXCL);
-               if (*offset > xip->i_d.di_size) {
-                       xip->i_d.di_size = *offset;
-                       i_size_write(inode, *offset);
-                       xip->i_update_core = 1;
-                       xip->i_update_size = 1;
-               }
+               if (*offset > xip->i_size)
+                       xip->i_size = *offset;
                xfs_iunlock(xip, XFS_ILOCK_EXCL);
        }
 
@@ -922,16 +926,31 @@ retry:
 
                error = sync_page_range(inode, mapping, pos, ret);
                if (!error)
-                       error = ret;
-               return error;
+                       error = -ret;
+               if (need_i_mutex)
+                       mutex_lock(&inode->i_mutex);
+               xfs_rwlock(bdp, locktype);
        }
 
  out_unlock_internal:
+       if (io->io_new_size) {
+               xfs_ilock(xip, XFS_ILOCK_EXCL);
+               io->io_new_size = 0;
+               /*
+                * If this was a direct or synchronous I/O that failed (such
+                * as ENOSPC) then part of the I/O may have been written to
+                * disk before the error occured.  In this case the on-disk
+                * file size may have been adjusted beyond the in-memory file
+                * size and now needs to be truncated back.
+                */
+               if (xip->i_d.di_size > xip->i_size)
+                       xip->i_d.di_size = xip->i_size;
+               xfs_iunlock(xip, XFS_ILOCK_EXCL);
+       }
        xfs_rwunlock(bdp, locktype);
  out_unlock_mutex:
        if (need_i_mutex)
                mutex_unlock(&inode->i_mutex);
- out_nounlocks:
        return -error;
 }
 
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 50f2213..b1ea26e 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4444,8 +4444,11 @@ xfs_bmap_one_block(
        xfs_bmbt_irec_t s;              /* internal version of extent */
 
 #ifndef DEBUG
-       if (whichfork == XFS_DATA_FORK)
-               return ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize;
+       if (whichfork == XFS_DATA_FORK) {
+               return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ?
+                       (ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
+                       (ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
+       }
 #endif /* !DEBUG */
        if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
                return 0;
@@ -4457,7 +4460,7 @@ xfs_bmap_one_block(
        xfs_bmbt_get_all(ep, &s);
        rval = s.br_startoff == 0 && s.br_blockcount == 1;
        if (rval && whichfork == XFS_DATA_FORK)
-               ASSERT(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
+               ASSERT(ip->i_size == ip->i_mount->m_sb.sb_blocksize);
        return rval;
 }
 
@@ -5817,7 +5820,7 @@ xfs_getbmap(
                        fixlen = XFS_MAXIOFFSET(mp);
                } else {
                        prealloced = 0;
-                       fixlen = ip->i_d.di_size;
+                       fixlen = ip->i_size;
                }
        } else {
                prealloced = 0;
@@ -5841,7 +5844,8 @@ xfs_getbmap(
 
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
-       if (whichfork == XFS_DATA_FORK && ip->i_delayed_blks) {
+       if (whichfork == XFS_DATA_FORK &&
+               (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) {
                /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
                error = bhv_vop_flush_pages(vp, (xfs_off_t)0, -1, 0, FI_REMAPF);
        }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7d1ab39..3ca5d43 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -442,6 +442,7 @@ xfs_iformat(
                        return XFS_ERROR(EFSCORRUPTED);
                }
                ip->i_d.di_size = 0;
+               ip->i_size = 0;
                ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, 
ARCH_CONVERT);
                break;
 
@@ -980,6 +981,7 @@ xfs_iread(
        }
 
        ip->i_delayed_blks = 0;
+       ip->i_size = ip->i_d.di_size;
 
        /*
         * Mark the buffer containing the inode as something to keep
@@ -1170,6 +1172,7 @@ xfs_ialloc(
        }
 
        ip->i_d.di_size = 0;
+       ip->i_size = 0;
        ip->i_d.di_nextents = 0;
        ASSERT(ip->i_d.di_nblocks == 0);
        xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD);
@@ -1340,7 +1343,7 @@ xfs_file_last_byte(
        } else {
                last_block = 0;
        }
-       size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_d.di_size);
+       size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
        last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
 
        last_byte = XFS_FSB_TO_B(mp, last_block);
@@ -1434,7 +1437,7 @@ xfs_itruncate_start(
        int             error = 0;
 
        ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
-       ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size));
+       ASSERT((new_size == 0) || (new_size <= ip->i_size));
        ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
               (flags == XFS_ITRUNC_MAYBE));
 
@@ -1558,7 +1561,7 @@ xfs_itruncate_finish(
 
        ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
        ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
-       ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size));
+       ASSERT((new_size == 0) || (new_size <= ip->i_size));
        ASSERT(*tp != NULL);
        ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
        ASSERT(ip->i_transp == *tp);
@@ -1632,8 +1635,20 @@ xfs_itruncate_finish(
         */
        if (fork == XFS_DATA_FORK) {
                if (ip->i_d.di_nextents > 0) {
-                       ip->i_d.di_size = new_size;
-                       xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
+                       /*
+                        * If we are not changing the file size then do
+                        * not update the on-disk file size - we may be
+                        * called from xfs_inactive_free_eofblocks().  If we
+                        * update the on-disk file size and then the system
+                        * crashes before the contents of the file are
+                        * flushed to disk then the files may be full of
+                        * holes (ie NULL files bug).
+                        */
+                       if (ip->i_size != new_size) {
+                               ip->i_d.di_size = new_size;
+                               ip->i_size = new_size;
+                               xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
+                       }
                }
        } else if (sync) {
                ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
@@ -1769,7 +1784,19 @@ xfs_itruncate_finish(
         */
        if (fork == XFS_DATA_FORK) {
                xfs_isize_check(mp, ip, new_size);
-               ip->i_d.di_size = new_size;
+               /*
+                * If we are not changing the file size then do
+                * not update the on-disk file size - we may be
+                * called from xfs_inactive_free_eofblocks().  If we
+                * update the on-disk file size and then the system
+                * crashes before the contents of the file are
+                * flushed to disk then the files may be full of
+                * holes (ie NULL files bug).
+                */
+               if (ip->i_size != new_size) {
+                       ip->i_d.di_size = new_size;
+                       ip->i_size = new_size;
+               }
        }
        xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
        ASSERT((new_size != 0) ||
@@ -1802,7 +1829,7 @@ xfs_igrow_start(
 
        ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
        ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
-       ASSERT(new_size > ip->i_d.di_size);
+       ASSERT(new_size > ip->i_size);
 
        /*
         * Zero any pages that may have been created by
@@ -1810,7 +1837,7 @@ xfs_igrow_start(
         * and any blocks between the old and new file sizes.
         */
        error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
-                            ip->i_d.di_size);
+                            ip->i_size);
        return error;
 }
 
@@ -1834,13 +1861,14 @@ xfs_igrow_finish(
        ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
        ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
        ASSERT(ip->i_transp == tp);
-       ASSERT(new_size > ip->i_d.di_size);
+       ASSERT(new_size > ip->i_size);
 
        /*
         * Update the file size.  Update the inode change timestamp
         * if change_flag set.
         */
        ip->i_d.di_size = new_size;
+       ip->i_size = new_size;
        if (change_flag)
                xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -2323,7 +2351,7 @@ xfs_ifree(
        ASSERT(ip->i_d.di_nlink == 0);
        ASSERT(ip->i_d.di_nextents == 0);
        ASSERT(ip->i_d.di_anextents == 0);
-       ASSERT((ip->i_d.di_size == 0) ||
+       ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
               ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
        ASSERT(ip->i_d.di_nblocks == 0);
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 699960f..cfe7b58 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -287,6 +287,7 @@ typedef struct xfs_inode {
        struct xfs_inode        *i_cnext;       /* cluster hash link forward */
        struct xfs_inode        *i_cprev;       /* cluster hash link backward */
 
+       xfs_fsize_t             i_size;         /* in-memory size */
        /* Trace buffers per inode. */
 #ifdef XFS_BMAP_TRACE
        struct ktrace           *i_xtrace;      /* inode extent list trace */
@@ -305,6 +306,8 @@ typedef struct xfs_inode {
 #endif
 } xfs_inode_t;
 
+#define XFS_ISIZE(ip)  (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
+                               (ip)->i_size : (ip)->i_d.di_size;
 
 /*
  * i_flags helper functions
diff --git a/fs/xfs/xfs_iocore.c b/fs/xfs/xfs_iocore.c
index 06d710c..81548ec 100644
--- a/fs/xfs/xfs_iocore.c
+++ b/fs/xfs/xfs_iocore.c
@@ -52,7 +52,7 @@ STATIC xfs_fsize_t
 xfs_size_fn(
        xfs_inode_t             *ip)
 {
-       return (ip->i_d.di_size);
+       return XFS_ISIZE(ip);
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index cde70e8..3f2b9f2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -458,7 +458,7 @@ xfs_iomap_write_direct(
                extsz = ip->i_d.di_extsize;
        }
 
-       isize = ip->i_d.di_size;
+       isize = ip->i_size;
        if (io->io_new_size > isize)
                isize = io->io_new_size;
 
@@ -524,7 +524,7 @@ xfs_iomap_write_direct(
        xfs_trans_ihold(tp, ip);
 
        bmapi_flag = XFS_BMAPI_WRITE;
-       if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))
+       if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
                bmapi_flag |= XFS_BMAPI_PREALLOC;
 
        /*
@@ -676,7 +676,7 @@ xfs_iomap_write_delay(
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
 retry:
-       isize = ip->i_d.di_size;
+       isize = ip->i_size;
        if (io->io_new_size > isize)
                isize = io->io_new_size;
 
@@ -817,7 +817,7 @@ xfs_iomap_write_allocate(
                         * we dropped the ilock in the interim.
                         */
 
-                       end_fsb = XFS_B_TO_FSB(mp, ip->i_d.di_size);
+                       end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
                        xfs_bmap_last_offset(NULL, ip, &last_block,
                                XFS_DATA_FORK);
                        last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 3ce204a..df441ee 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -22,6 +22,7 @@
 
 
 typedef enum {                         /* iomap_flags values */
+       IOMAP_READ =            0,      /* mapping for a read */
        IOMAP_EOF =             0x01,   /* mapping contains EOF   */
        IOMAP_HOLE =            0x02,   /* mapping covers a hole  */
        IOMAP_DELAY =           0x04,   /* mapping covers delalloc region  */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6e49bd3..e17be3b 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -133,7 +133,7 @@ xfs_getattr(
        if (!(flags & ATTR_LAZY))
                xfs_ilock(ip, XFS_ILOCK_SHARED);
 
-       vap->va_size = ip->i_d.di_size;
+       vap->va_size = XFS_ISIZE(ip);
        if (vap->va_mask == XFS_AT_SIZE)
                goto all_done;
 
@@ -496,7 +496,7 @@ xfs_setattr(
        if (mask & XFS_AT_SIZE) {
                /* Short circuit the truncate case for zero length files */
                if ((vap->va_size == 0) &&
-                  (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) {
+                  (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) {
                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
                        lock_flags &= ~XFS_ILOCK_EXCL;
                        if (mask & XFS_AT_CTIME)
@@ -614,7 +614,7 @@ xfs_setattr(
         */
        if (mask & XFS_AT_SIZE) {
                code = 0;
-               if ((vap->va_size > ip->i_d.di_size) && 
+               if ((vap->va_size > ip->i_size) &&
                    (flags & ATTR_NOSIZETOK) == 0) {
                        code = xfs_igrow_start(ip, vap->va_size, credp);
                }
@@ -654,10 +654,10 @@ xfs_setattr(
         * Truncate file.  Must have write permission and not be a directory.
         */
        if (mask & XFS_AT_SIZE) {
-               if (vap->va_size > ip->i_d.di_size) {
+               if (vap->va_size > ip->i_size) {
                        xfs_igrow_finish(tp, ip, vap->va_size,
                            !(flags & ATTR_DMI));
-               } else if ((vap->va_size <= ip->i_d.di_size) ||
+               } else if ((vap->va_size <= ip->i_size) ||
                           ((vap->va_size == 0) && ip->i_d.di_nextents)) {
                        /*
                         * signal a sync transaction unless
@@ -1221,7 +1221,7 @@ xfs_inactive_free_eofblocks(
         * Figure out if there are any blocks beyond the end
         * of the file.  If not, then there is nothing to do.
         */
-       end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size));
+       end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
        last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
        map_len = last_fsb - end_fsb;
        if (map_len <= 0)
@@ -1258,7 +1258,7 @@ xfs_inactive_free_eofblocks(
                 */
                xfs_ilock(ip, XFS_IOLOCK_EXCL);
                error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
-                                   ip->i_d.di_size);
+                                   ip->i_size);
                if (error) {
                        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                        return error;
@@ -1282,7 +1282,7 @@ xfs_inactive_free_eofblocks(
                xfs_trans_ihold(tp, ip);
 
                error = xfs_itruncate_finish(&tp, ip,
-                                            ip->i_d.di_size,
+                                            ip->i_size,
                                             XFS_DATA_FORK,
                                             0);
                /*
@@ -1568,7 +1568,7 @@ xfs_release(
 
        if (ip->i_d.di_nlink != 0) {
                if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
-                    ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
+                    ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
                       ip->i_delayed_blks > 0)) &&
                     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
                    (!(ip->i_d.di_flags &
@@ -1629,8 +1629,8 @@ xfs_inactive(
         * only one with a reference to the inode.
         */
        truncate = ((ip->i_d.di_nlink == 0) &&
-            ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) ||
-             (ip->i_delayed_blks > 0)) &&
+           ((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
+            (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
            ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
 
        mp = ip->i_mount;
@@ -1648,7 +1648,7 @@ xfs_inactive(
 
        if (ip->i_d.di_nlink != 0) {
                if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
-                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
+                     ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
                        ip->i_delayed_blks > 0)) &&
                      (ip->i_df.if_flags & XFS_IFEXTENTS) &&
                     (!(ip->i_d.di_flags &
@@ -4055,14 +4055,14 @@ xfs_alloc_file_space(
        allocatesize_fsb = XFS_B_TO_FSB(mp, count);
 
        /*      Generate a DMAPI event if needed.       */
-       if (alloc_type != 0 && offset < ip->i_d.di_size &&
+       if (alloc_type != 0 && offset < ip->i_size &&
                        (attr_flags&ATTR_DMI) == 0  &&
                        DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
                xfs_off_t           end_dmi_offset;
 
                end_dmi_offset = offset+len;
-               if (end_dmi_offset > ip->i_d.di_size)
-                       end_dmi_offset = ip->i_d.di_size;
+               if (end_dmi_offset > ip->i_size)
+                       end_dmi_offset = ip->i_size;
                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
                        offset, end_dmi_offset - offset,
                        0, NULL);
@@ -4318,11 +4318,11 @@ xfs_free_file_space(
        end_dmi_offset = offset + len;
        endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
 
-       if (offset < ip->i_d.di_size &&
+       if (offset < ip->i_size &&
            (attr_flags & ATTR_DMI) == 0 &&
            DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
-               if (end_dmi_offset > ip->i_d.di_size)
-                       end_dmi_offset = ip->i_d.di_size;
+               if (end_dmi_offset > ip->i_size)
+                       end_dmi_offset = ip->i_size;
                error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp,
                                offset, end_dmi_offset - offset,
                                AT_DELAY_FLAG(attr_flags), NULL);
@@ -4541,7 +4541,7 @@ xfs_change_file_space(
                bf->l_start += offset;
                break;
        case 2: /*SEEK_END*/
-               bf->l_start += ip->i_d.di_size;
+               bf->l_start += ip->i_size;
                break;
        default:
                return XFS_ERROR(EINVAL);
@@ -4558,7 +4558,7 @@ xfs_change_file_space(
        bf->l_whence = 0;
 
        startoffset = bf->l_start;
-       fsize = ip->i_d.di_size;
+       fsize = ip->i_size;
 
        /*
         * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[XFS] Fix to prevent the notorious 'NULL files' problem after a crash.

Reply via email to