This patch is another take at fixing the race between mount and umount
resetting the blocksize and causing buffer errors, infinite loops in
__getblk_slow, and possibly other undiscovered effects.

It adds possible flags to bd_claim such that the caller can request
exclusive access and/or wait until the device becomes available. Since bd_claim
already allows/denies access based on the holder, the BD_EXCL flag operates
under the assumption that all callers of bd_claim for a particular holder will
use the flag. This is currently true. BD_WAIT places the request on a wait
queue until access can be granted. It uses a global wait queue, which isn't a
contention point since bd_claim/bd_release both operate under the global
bdev_lock anyway.

Filesystems (via get_sb_bdev) now use BD_EXCL|BD_WAIT to ensure the previous
mount has completely shut down and closed the device before re-opening it
for a new mount. 

It's ugly, and I'm open to suggestions, but it seems to be the only way to
stop this race reliably.

Signed-off-by: Jeff Mahoney <[EMAIL PROTECTED]>

diff -ruNpX dontdiff linux-2.6.11/fs/block_dev.c linux-2.6.11.bs/fs/block_dev.c
--- linux-2.6.11/fs/block_dev.c 2005-03-14 21:25:20.000000000 -0500
+++ linux-2.6.11.bs/fs/block_dev.c      2005-03-14 22:17:16.000000000 -0500
@@ -238,6 +238,7 @@ static int block_fsync(struct file *filp
  */
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
+static DECLARE_WAIT_QUEUE_HEAD(bdev_wq);
 static kmem_cache_t * bdev_cachep;
 
 static struct inode *bdev_alloc_inode(struct super_block *sb)
@@ -443,20 +444,33 @@ void bd_forget(struct inode *inode)
        spin_unlock(&bdev_lock);
 }
 
-int bd_claim(struct block_device *bdev, void *holder)
+/*
+ * flags:
+ * BD_NONE: No special behavior.
+ * BD_EXCL: Must have sole access to device, even if holder is the same.
+ *          This is really enforced by the holder always using BD_EXCL.
+ * BD_WAIT: Wait until access is available before returning.
+ */
+int __bd_claim(struct block_device *bdev, void *holder, int flags)
 {
        int res;
+       DEFINE_WAIT (wait);
+
+retry:
        spin_lock(&bdev_lock);
+       prepare_to_wait (&bdev_wq, &wait, TASK_UNINTERRUPTIBLE);
 
        /* first decide result */
-       if (bdev->bd_holder == holder)
+       if (bdev->bd_holder == holder) {
                res = 0;         /* already a holder */
-       else if (bdev->bd_holder != NULL)
+               if (flags & BD_EXCL)
+                       res = -EBUSY;
+       } else if (bdev->bd_holder != NULL)
                res = -EBUSY;    /* held by someone else */
        else if (bdev->bd_contains == bdev)
                res = 0;         /* is a whole device which isn't held */
 
-       else if (bdev->bd_contains->bd_holder == bd_claim)
+       else if (bdev->bd_contains->bd_holder == __bd_claim)
                res = 0;         /* is a partition of a device that is being 
partitioned */
        else if (bdev->bd_contains->bd_holder != NULL)
                res = -EBUSY;    /* is a partition of a held device */
@@ -470,15 +484,21 @@ int bd_claim(struct block_device *bdev, 
                 * be set to bd_claim before being set to holder
                 */
                bdev->bd_contains->bd_holders ++;
-               bdev->bd_contains->bd_holder = bd_claim;
+               bdev->bd_contains->bd_holder = __bd_claim;
                bdev->bd_holders++;
                bdev->bd_holder = holder;
+       } else if (flags & BD_WAIT) {
+               spin_unlock (&bdev_lock);
+               schedule();
+               goto retry;
        }
+
+       finish_wait (&bdev_wq, &wait);
        spin_unlock(&bdev_lock);
        return res;
 }
 
-EXPORT_SYMBOL(bd_claim);
+EXPORT_SYMBOL(__bd_claim);
 
 void bd_release(struct block_device *bdev)
 {
@@ -488,6 +508,7 @@ void bd_release(struct block_device *bde
        if (!--bdev->bd_holders)
                bdev->bd_holder = NULL;
        spin_unlock(&bdev_lock);
+       wake_up_all (&bdev_wq);
 }
 
 EXPORT_SYMBOL(bd_release);
@@ -876,7 +897,8 @@ fail:
  * Open the blockdevice described by the special file at @path, claim it
  * for the @holder.
  */
-struct block_device *open_bdev_excl(const char *path, int flags, void *holder)
+struct block_device *__open_bdev_excl(const char *path, int flags,
+                                      void *holder, int bdflags)
 {
        struct block_device *bdev;
        mode_t mode = FMODE_READ;
@@ -894,7 +916,7 @@ struct block_device *open_bdev_excl(cons
        error = -EACCES;
        if (!(flags & MS_RDONLY) && bdev_read_only(bdev))
                goto blkdev_put;
-       error = bd_claim(bdev, holder);
+       error = __bd_claim(bdev, holder, bdflags);
        if (error)
                goto blkdev_put;
 
@@ -905,7 +927,7 @@ blkdev_put:
        return ERR_PTR(error);
 }
 
-EXPORT_SYMBOL(open_bdev_excl);
+EXPORT_SYMBOL(__open_bdev_excl);
 
 /**
  * close_bdev_excl  -  release a blockdevice openen by open_bdev_excl()
diff -ruNpX dontdiff linux-2.6.11/fs/super.c linux-2.6.11.bs/fs/super.c
--- linux-2.6.11/fs/super.c     2005-03-14 21:25:20.000000000 -0500
+++ linux-2.6.11.bs/fs/super.c  2005-03-14 21:38:22.000000000 -0500
@@ -677,7 +677,7 @@ struct super_block *get_sb_bdev(struct f
        struct super_block *s;
        int error = 0;
 
-       bdev = open_bdev_excl(dev_name, flags, fs_type);
+       bdev = __open_bdev_excl(dev_name, flags, fs_type, BD_EXCL|BD_WAIT);
        if (IS_ERR(bdev))
                return (struct super_block *)bdev;
 
diff -ruNpX dontdiff linux-2.6.11/include/linux/fs.h 
linux-2.6.11.bs/include/linux/fs.h
--- linux-2.6.11/include/linux/fs.h     2005-03-14 21:25:20.000000000 -0500
+++ linux-2.6.11.bs/include/linux/fs.h  2005-03-14 21:45:21.000000000 -0500
@@ -1320,7 +1320,14 @@ extern int blkdev_ioctl(struct inode *, 
 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
 extern int blkdev_get(struct block_device *, mode_t, unsigned);
 extern int blkdev_put(struct block_device *);
-extern int bd_claim(struct block_device *, void *);
+#define BD_NONE 0x0
+#define BD_EXCL 0x1
+#define BD_WAIT 0x2
+extern int __bd_claim(struct block_device *, void *, int);
+static inline int bd_claim(struct block_device *bdev, void *holder)
+{
+       return __bd_claim(bdev, holder, 0);
+}
 extern void bd_release(struct block_device *);
 
 /* fs/char_dev.c */
@@ -1337,6 +1344,11 @@ extern int chrdev_open(struct inode *, s
 extern const char *__bdevname(dev_t, char *buffer);
 extern const char *bdevname(struct block_device *bdev, char *buffer);
 extern struct block_device *lookup_bdev(const char *);
+extern struct block_device *__open_bdev_excl(const char *, int, void *, int);
+static inline struct block_device *open_bdev_excl(const char *path, int flags, 
void *holder)
+{
+       return __open_bdev_excl(path, flags, holder, BD_NONE);
+}
 extern struct block_device *open_bdev_excl(const char *, int, void *);
 extern void close_bdev_excl(struct block_device *);
 
-- 
Jeff Mahoney
SuSE Labs
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to