tree f301f581dd4389028f8b2588940d456904e552f1
parent 2e8f68c45925123d33d476ce369b570bd989dd9a
author Larry Woodman <[EMAIL PROTECTED]> Fri, 15 Jul 2005 19:32:08 -0400
committer Marcelo Tosatti <[EMAIL PROTECTED]> Tue, 26 Jul 2005 13:52:46 -0300

[PATCH] workaround inode cache (prune_icache/__refile_inode) SMP races

Over the past couple of weeks we have seen two races in the inode cache
code. The first is between [dispose_list()] and __refile_inode() and the
second is between prune_icache() and truncate_inodes(). I posted both of
these patches but wanted to make sure they got properly reviewed and
included in RHEL3-U6.

Fixes bug 155289.

The first scenerio is:

1.) cpu0 is in __sync_one() just about to call __refile_inode() after
taking the inode_lock and clearing I_LOCK.

spin_lock(&inode_lock);
inode->i_state &= ~I_LOCK;
if (!(inode->i_state & I_FREEING))
__refile_inode(inode);
wake_up(&inode->i_wait);

2.) cpu1 is in [dispose_list()] where it has dropped the inode_lock and calls
clear_inode(). It doesnt block because
I_LOCK is clear so it sets the inode state.

void clear_inode(struct inode *inode)
{
..
wait_on_inode(inode);
..
inode->i_state = I_CLEAR;
..
}

3.) cpu0 calls __refile_inode which places is on one of the four
possible inode lists

static inline void __refile_inode(struct inode *inode)
{
        if (inode->i_state & I_DIRTY)
                to = &inode->i_sb->s_dirty;
        else if (atomic_read(&inode->i_count))
                to = &inode_in_use;
        else if (inode->i_data.nrpages)
                to = &inode_unused_pagecache;
        else
                to = &inode_unused;

        list_del(&inode->i_list);
        list_add(&inode->i_list, to);
}

4.) cpu1 returns from clear_inode() then calls destroy_inode() which
kmem_cache_free()s it.

static void destroy_inode(struct inode *inode)
{

        if (inode->i_sb->s_op->destroy_inode)
                inode->i_sb->s_op->destroy_inode(inode);
        else
                kmem_cache_free(inode_cachep, inode);
}

5.) at this point we have an inode that has been kmem_cache_free()'d
that is also sitting one of the lists determined by __refile_inode(),
that cant be good!!! Also, the code looks the same in RHEL4.

The second scenerio is:

CPU0 is in prune_icache() called by kswapd and CPU1 is in
invalidate_inodes() called by the auto-mount daemon.

1.) CPU0: prune_icache() sets the I_LOCK bit in an inode on the
inode_unused_pagecache list, releases the inode_lock and calls
invalidate_inode_pages.

2.) CPU1: invalidate_inodes() calls invalidate_list() for the
inode_unused_pagecache list with the node_lock held and sets the
I_FREEING bit in the inode->i_state.

3.) CPU0: prune_icache() acquires the inode_lock and clears the I_LOCK
bit in the inode->i_state.

4.) CPU1: dispose_list() calls clear_inode() without the inode_lock
held. Since the I_LOCK bit is clear, clear_inode() sets inode->i_state =
I_CLEAR, clearing the I_FREEING bit.

5.) CPU0: prune_icache() calls __refile_inode() because clear_inode()
cleared I_FREEING without holding the inode_lock. This inode that is no
longer on the inode_unused_pagecache list which results in that inode
being placed on the inode_unused list.

6.) CPU1: dispose_list() calls destroy_inode() which kmem_cache_free()s
an inode that is also on the inode_unused list.

At this point there is an inode that has been kmem_cache_free()'d and is
also on the inode_unused list.

This patch to clear_inode() acquires the inode_lock before manipulating
the inode->i_state field. This is the only place in the kernel that
manipulates the inode without holding the inode_lock.

 fs/inode.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/inode.c b/fs/inode.c
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -297,7 +297,7 @@ static inline void __refile_inode(struct
 {
        struct list_head *to;
 
-       if (inode->i_state & I_FREEING)
+       if (inode->i_state & (I_FREEING|I_CLEAR))
                return;
        if (list_empty(&inode->i_hash))
                return;
@@ -634,7 +634,9 @@ void clear_inode(struct inode *inode)
                cdput(inode->i_cdev);
                inode->i_cdev = NULL;
        }
+       spin_lock(&inode_lock);
        inode->i_state = I_CLEAR;
+       spin_unlock(&inode_lock);
 }
 
 /*
-
To unsubscribe from this list: send the line "unsubscribe git-commits-24" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to