This patch changes all cases where the MS_ACTIVE bit gets set. This is
done to eliminate a race condition that can occur if an inode is
allocated and then released (using iput) during the ->fill_super
functions. The race condition is between kswapd and mount.

For most filesystems this can only happen in an error path when kswapd
is running concurrently. For isofs, however, the error can occur in a
more common code path (which is how the bug was found).

The changes are fairly straight forward. There are a couple of unique
cases:

ext3_orphan_cleanup() was setting MS_ACTIVE for the CONFIG_QUOTA case.
Since ext3_orphan_cleanup() is only called by ext3_fill_super(), this
is no longer necessary since the MS_ACTIVE will already be set.

Reiserfs:
For the CONFIG_QUOTA case, finish_unfinished() ensures that MS_ACTIVE
is set, and then conditionally turns it off depending on its initial
state.  finish_unfinished() is called by two functions: reiserfs_remount
and reiserfs_fill_super. The MS_ACTIVE flag was previously NOT set for
reiserfs_fill_super, but that has changed now - it will always be
set. The MS_ACTIVE flag is also set in the reiserfs_remount case, so
the manipulation of the MS_ACTIVE in finish_unfinished() is no longer
necessary.

Comments?

- Russ

--- linux-2.6.12-rc1/fs/afs/super.c     2005-03-17 17:34:09.000000000 -0800
+++ linux-2.6.12-rc1-fix2/fs/afs/super.c        2005-04-01 16:31:54.803904510
-0800
@@ -339,15 +339,15 @@ static struct super_block *afs_get_sb(st
        if (IS_ERR(sb))
                goto error;
 
-       sb->s_flags = flags;
+       sb->s_flags = flags | MS_ACTIVE;
 
        ret = afs_fill_super(sb, &params, flags & MS_VERBOSE ? 1 : 0);
        if (ret < 0) {
+               sb->s_flags &= ~MS_ACTIVE;
                up_write(&sb->s_umount);
                deactivate_super(sb);
                goto error;
        }
-       sb->s_flags |= MS_ACTIVE;
 
        afs_put_volume(params.volume);
        afs_put_cell(params.default_cell);
--- linux-2.6.12-rc1/fs/cifs/cifsfs.c   2005-03-17 17:34:13.000000000
-0800
+++ linux-2.6.12-rc1-fix2/fs/cifs/cifsfs.c      2005-04-01 16:14:26.000000000
-0800
@@ -422,15 +422,15 @@ cifs_get_sb(struct file_system_type *fs_
        if (IS_ERR(sb))
                return sb;
 
-       sb->s_flags = flags;
+       sb->s_flags = flags | MS_ACTIVE;
 
        rc = cifs_read_super(sb, data, dev_name, flags & MS_VERBOSE ? 1 : 0);
        if (rc) {
+               sb->s_flags &= ~MS_ACTIVE;
                up_write(&sb->s_umount);
                deactivate_super(sb);
                return ERR_PTR(rc);
        }
-       sb->s_flags |= MS_ACTIVE;
        return sb;
 }
 
--- linux-2.6.12-rc1/fs/ext3/super.c    2005-03-17 17:34:57.000000000 -0800
+++ linux-2.6.12-rc1-fix2/fs/ext3/super.c       2005-04-01 16:17:05.000000000
-0800
@@ -1121,8 +1121,6 @@ static void ext3_orphan_cleanup (struct 
                sb->s_flags &= ~MS_RDONLY;
        }
 #ifdef CONFIG_QUOTA
-       /* Needed for iput() to work correctly and not trash data */
-       sb->s_flags |= MS_ACTIVE;
        /* Turn on quotas so that they are updated correctly */
        for (i = 0; i < MAXQUOTAS; i++) {
                if (EXT3_SB(sb)->s_qf_names[i]) {
--- linux-2.6.12-rc1/fs/jffs2/super.c   2005-03-17 17:34:08.000000000
-0800
+++ linux-2.6.12-rc1-fix2/fs/jffs2/super.c      2005-04-01 15:41:06.000000000
-0800
@@ -141,18 +141,18 @@ static struct super_block *jffs2_get_sb_
                  mtd->index, mtd->name));
 
        sb->s_op = &jffs2_super_operations;
-       sb->s_flags = flags | MS_NOATIME;
+       sb->s_flags = flags | MS_NOATIME | MS_ACTIVE;
 
        ret = jffs2_do_fill_super(sb, data, (flags&MS_VERBOSE)?1:0);
 
        if (ret) {
                /* Failure case... */
+               sb->s_flags &= ~MS_ACTIVE;
                up_write(&sb->s_umount);
                deactivate_super(sb);
                return ERR_PTR(ret);
        }
 
-       sb->s_flags |= MS_ACTIVE;
        return sb;
 
  out_put:
--- linux-2.6.12-rc1/fs/libfs.c 2005-03-17 17:33:48.000000000 -0800
+++ linux-2.6.12-rc1-fix2/fs/libfs.c    2005-04-01 16:20:30.000000000 -0800
@@ -206,7 +206,7 @@ get_sb_pseudo(struct file_system_type *f
        if (IS_ERR(s))
                return s;
 
-       s->s_flags = MS_NOUSER;
+       s->s_flags = MS_NOUSER | MS_ACTIVE;
        s->s_maxbytes = ~0ULL;
        s->s_blocksize = 1024;
        s->s_blocksize_bits = 10;
@@ -228,10 +228,10 @@ get_sb_pseudo(struct file_system_type *f
        dentry->d_parent = dentry;
        d_instantiate(dentry, root);
        s->s_root = dentry;
-       s->s_flags |= MS_ACTIVE;
        return s;
 
 Enomem:
+       s->s_flags &= ~MS_ACTIVE;
        up_write(&s->s_umount);
        deactivate_super(s);
        return ERR_PTR(-ENOMEM);
--- linux-2.6.12-rc1/fs/nfs/inode.c     2005-03-17 17:34:27.000000000 -0800
+++ linux-2.6.12-rc1-fix2/fs/nfs/inode.c        2005-04-01 15:34:39.000000000
-0800
@@ -1445,7 +1445,7 @@ static struct super_block *nfs_get_sb(st
                return s;
        }
 
-       s->s_flags = flags;
+       s->s_flags = flags | MS_ACTIVE;
 
        /* Fire up rpciod if not yet running */
        if (rpciod_up() != 0) {
@@ -1456,11 +1456,11 @@ static struct super_block *nfs_get_sb(st
 
        error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
        if (error) {
+               s->s_flags &= ~MS_ACTIVE;
                up_write(&s->s_umount);
                deactivate_super(s);
                return ERR_PTR(error);
        }
-       s->s_flags |= MS_ACTIVE;
        return s;
 }
 
@@ -1783,7 +1783,7 @@ static struct super_block *nfs4_get_sb(s
        if (IS_ERR(s) || s->s_root)
                goto out_free;
 
-       s->s_flags = flags;
+       s->s_flags = flags | MS_ACTIVE;
 
        /* Fire up rpciod if not yet running */
        if (rpciod_up() != 0) {
@@ -1794,11 +1794,11 @@ static struct super_block *nfs4_get_sb(s
 
        error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
        if (error) {
+               s->s_flags &= ~MS_ACTIVE;
                up_write(&s->s_umount);
                deactivate_super(s);
                return ERR_PTR(error);
        }
-       s->s_flags |= MS_ACTIVE;
        return s;
 out_err:
        s = (struct super_block *)p;
--- linux-2.6.12-rc1/fs/reiserfs/super.c        2005-03-17 17:34:10.000000000
-0800
+++ linux-2.6.12-rc1-fix2/fs/reiserfs/super.c   2005-04-01
17:10:03.844698882 -0800
@@ -158,7 +158,6 @@ static int finish_unfinished (struct sup
     int truncate;
 #ifdef CONFIG_QUOTA
     int i;
-    int ms_active_set;
 #endif
  
  
@@ -168,13 +167,6 @@ static int finish_unfinished (struct sup
     max_cpu_key.key_length = 3;
 
 #ifdef CONFIG_QUOTA
-    /* Needed for iput() to work correctly and not trash data */
-    if (s->s_flags & MS_ACTIVE) {
-           ms_active_set = 0;
-    } else {
-           ms_active_set = 1;
-           s->s_flags |= MS_ACTIVE;
-    }
     /* Turn on quotas so that they are updated correctly */
     for (i = 0; i < MAXQUOTAS; i++) {
        if (REISERFS_SB(s)->s_qf_names[i]) {
@@ -282,9 +274,6 @@ static int finish_unfinished (struct sup
             if (sb_dqopt(s)->files[i])
                     vfs_quota_off_mount(s, i);
     }
-    if (ms_active_set)
-           /* Restore the flag back */
-           s->s_flags &= ~MS_ACTIVE;
 #endif
     pathrelse (&path);
     if (done)
--- linux-2.6.12-rc1/fs/super.c 2005-03-17 17:34:10.000000000 -0800
+++ linux-2.6.12-rc1-fix2/fs/super.c    2005-04-01 15:27:52.000000000 -0800
@@ -702,17 +702,17 @@ struct super_block *get_sb_bdev(struct f
        } else {
                char b[BDEVNAME_SIZE];
 
-               s->s_flags = flags;
+               s->s_flags = flags | MS_ACTIVE;
                strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
                s->s_old_blocksize = block_size(bdev);
                sb_set_blocksize(s, s->s_old_blocksize);
                error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
                if (error) {
+                       s->s_flags &= ~MS_ACTIVE;
                        up_write(&s->s_umount);
                        deactivate_super(s);
                        s = ERR_PTR(error);
                } else {
-                       s->s_flags |= MS_ACTIVE;
                        bdev_uevent(bdev, KOBJ_MOUNT);
                }
        }
@@ -748,15 +748,15 @@ struct super_block *get_sb_nodev(struct 
        if (IS_ERR(s))
                return s;
 
-       s->s_flags = flags;
+       s->s_flags = flags | MS_ACTIVE;
 
        error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
        if (error) {
+               s->s_flags &= ~MS_ACTIVE;
                up_write(&s->s_umount);
                deactivate_super(s);
                return ERR_PTR(error);
        }
-       s->s_flags |= MS_ACTIVE;
        return s;
 }
 
@@ -778,14 +778,14 @@ struct super_block *get_sb_single(struct
        if (IS_ERR(s))
                return s;
        if (!s->s_root) {
-               s->s_flags = flags;
+               s->s_flags = flags | MS_ACTIVE;
                error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
                if (error) {
+                       s->s_flags &= ~MS_ACTIVE;
                        up_write(&s->s_umount);
                        deactivate_super(s);
                        return ERR_PTR(error);
                }
-               s->s_flags |= MS_ACTIVE;
        }
        do_remount_sb(s, flags, data, 0);
        return s;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to