[PATCH 06/14] loop: Split setting of lo_state from loop_clr_fd

2018-09-27 Thread Jan Kara
Move setting of lo_state to Lo_rundown out into the callers. That will
allow us to unlock loop_ctl_mutex while the loop device is protected
from other changes by its special state.

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 52 +++-
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index a86ef20c15e2..51d11898e170 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -976,7 +976,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
loop_reread_partitions(lo, bdev);
 
/* Grab the block_device to prevent its destruction after we
-* put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
+* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
 */
bdgrab(bdev);
return 0;
@@ -1026,31 +1026,15 @@ loop_init_xfer(struct loop_device *lo, struct 
loop_func_table *xfer,
return err;
 }
 
-static int loop_clr_fd(struct loop_device *lo)
+static int __loop_clr_fd(struct loop_device *lo)
 {
struct file *filp = lo->lo_backing_file;
gfp_t gfp = lo->old_gfp_mask;
struct block_device *bdev = lo->lo_device;
 
-   if (lo->lo_state != Lo_bound)
+   if (WARN_ON_ONCE(lo->lo_state != Lo_rundown))
return -ENXIO;
 
-   /*
-* If we've explicitly asked to tear down the loop device,
-* and it has an elevated reference count, set it for auto-teardown when
-* the last reference goes away. This stops $!~#$@ udev from
-* preventing teardown because it decided that it needs to run blkid on
-* the loopback device whenever they appear. xfstests is notorious for
-* failing tests because blkid via udev races with a losetup
-* /do something like mkfs/losetup -d  causing the losetup -d
-* command to fail with EBUSY.
-*/
-   if (atomic_read(>lo_refcnt) > 1) {
-   lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
-   mutex_unlock(_ctl_mutex);
-   return 0;
-   }
-
if (filp == NULL)
return -EINVAL;
 
@@ -1058,7 +1042,6 @@ static int loop_clr_fd(struct loop_device *lo)
blk_mq_freeze_queue(lo->lo_queue);
 
spin_lock_irq(>lo_lock);
-   lo->lo_state = Lo_rundown;
lo->lo_backing_file = NULL;
spin_unlock_irq(>lo_lock);
 
@@ -,6 +1094,30 @@ static int loop_clr_fd(struct loop_device *lo)
return 0;
 }
 
+static int loop_clr_fd(struct loop_device *lo)
+{
+   if (lo->lo_state != Lo_bound)
+   return -ENXIO;
+   /*
+* If we've explicitly asked to tear down the loop device,
+* and it has an elevated reference count, set it for auto-teardown when
+* the last reference goes away. This stops $!~#$@ udev from
+* preventing teardown because it decided that it needs to run blkid on
+* the loopback device whenever they appear. xfstests is notorious for
+* failing tests because blkid via udev races with a losetup
+* /do something like mkfs/losetup -d  causing the losetup -d
+* command to fail with EBUSY.
+*/
+   if (atomic_read(>lo_refcnt) > 1) {
+   lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
+   mutex_unlock(_ctl_mutex);
+   return 0;
+   }
+   lo->lo_state = Lo_rundown;
+
+   return __loop_clr_fd(lo);
+}
+
 static int
 loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 {
@@ -1692,11 +1699,14 @@ static void lo_release(struct gendisk *disk, fmode_t 
mode)
goto out_unlock;
 
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
+   if (lo->lo_state != Lo_bound)
+   goto out_unlock;
+   lo->lo_state = Lo_rundown;
/*
 * In autoclear mode, stop the loop thread
 * and remove configuration after last close.
 */
-   err = loop_clr_fd(lo);
+   err = __loop_clr_fd(lo);
if (!err)
return;
} else if (lo->lo_state == Lo_bound) {
-- 
2.16.4



[PATCH 02/14] block/loop: Use global lock for ioctl() operation.

2018-09-27 Thread Jan Kara
From: Tetsuo Handa 

syzbot is reporting NULL pointer dereference [1] which is caused by
race condition between ioctl(loop_fd, LOOP_CLR_FD, 0) versus
ioctl(other_loop_fd, LOOP_SET_FD, loop_fd) due to traversing other
loop devices at loop_validate_file() without holding corresponding
lo->lo_ctl_mutex locks.

Since ioctl() request on loop devices is not frequent operation, we don't
need fine grained locking. Let's use global lock in order to allow safe
traversal at loop_validate_file().

Note that syzbot is also reporting circular locking dependency between
bdev->bd_mutex and lo->lo_ctl_mutex [2] which is caused by calling
blkdev_reread_part() with lock held. This patch does not address it.

[1] 
https://syzkaller.appspot.com/bug?id=f3cfe26e785d85f9ee259f385515291d21bd80a3
[2] 
https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d15889

Signed-off-by: Tetsuo Handa 
Reported-by: syzbot 
Reviewed-by: Jan Kara 
Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 58 ++--
 drivers/block/loop.h |  1 -
 2 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 50c81ff44ae2..d0f1b7106572 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -84,6 +84,7 @@
 
 static DEFINE_IDR(loop_index_idr);
 static DEFINE_MUTEX(loop_index_mutex);
+static DEFINE_MUTEX(loop_ctl_mutex);
 
 static int max_part;
 static int part_shift;
@@ -1047,7 +1048,7 @@ static int loop_clr_fd(struct loop_device *lo)
 */
if (atomic_read(>lo_refcnt) > 1) {
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
-   mutex_unlock(>lo_ctl_mutex);
+   mutex_unlock(_ctl_mutex);
return 0;
}
 
@@ -1100,12 +1101,12 @@ static int loop_clr_fd(struct loop_device *lo)
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
loop_unprepare_queue(lo);
-   mutex_unlock(>lo_ctl_mutex);
+   mutex_unlock(_ctl_mutex);
/*
-* Need not hold lo_ctl_mutex to fput backing file.
-* Calling fput holding lo_ctl_mutex triggers a circular
+* Need not hold loop_ctl_mutex to fput backing file.
+* Calling fput holding loop_ctl_mutex triggers a circular
 * lock dependency possibility warning as fput can take
-* bd_mutex which is usually taken before lo_ctl_mutex.
+* bd_mutex which is usually taken before loop_ctl_mutex.
 */
fput(filp);
return 0;
@@ -1210,7 +1211,7 @@ loop_get_status(struct loop_device *lo, struct 
loop_info64 *info)
int ret;
 
if (lo->lo_state != Lo_bound) {
-   mutex_unlock(>lo_ctl_mutex);
+   mutex_unlock(_ctl_mutex);
return -ENXIO;
}
 
@@ -1229,10 +1230,10 @@ loop_get_status(struct loop_device *lo, struct 
loop_info64 *info)
   lo->lo_encrypt_key_size);
}
 
-   /* Drop lo_ctl_mutex while we call into the filesystem. */
+   /* Drop loop_ctl_mutex while we call into the filesystem. */
path = lo->lo_backing_file->f_path;
path_get();
-   mutex_unlock(>lo_ctl_mutex);
+   mutex_unlock(_ctl_mutex);
ret = vfs_getattr(, , STATX_INO, AT_STATX_SYNC_AS_STAT);
if (!ret) {
info->lo_device = huge_encode_dev(stat.dev);
@@ -1324,7 +1325,7 @@ loop_get_status_old(struct loop_device *lo, struct 
loop_info __user *arg) {
int err;
 
if (!arg) {
-   mutex_unlock(>lo_ctl_mutex);
+   mutex_unlock(_ctl_mutex);
return -EINVAL;
}
err = loop_get_status(lo, );
@@ -1342,7 +1343,7 @@ loop_get_status64(struct loop_device *lo, struct 
loop_info64 __user *arg) {
int err;
 
if (!arg) {
-   mutex_unlock(>lo_ctl_mutex);
+   mutex_unlock(_ctl_mutex);
return -EINVAL;
}
err = loop_get_status(lo, );
@@ -1400,7 +1401,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t 
mode,
struct loop_device *lo = bdev->bd_disk->private_data;
int err;
 
-   err = mutex_lock_killable_nested(>lo_ctl_mutex, 1);
+   err = mutex_lock_killable_nested(_ctl_mutex, 1);
if (err)
goto out_unlocked;
 
@@ -1412,7 +1413,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t 
mode,
err = loop_change_fd(lo, bdev, arg);
break;
case LOOP_CLR_FD:
-   /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
+   /* loop_clr_fd would have unlocked loop_ctl_mutex on success */
err = loop_clr_fd(lo);
if (!err)
goto out_unlocked;
@@ -1425,7 +1426,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t 
mode,
break;
case LOOP_GET_STATUS:
   

[PATCH 01/14] block/loop: Don't grab "struct file" for vfs_getattr() operation.

2018-09-27 Thread Jan Kara
From: Tetsuo Handa 

vfs_getattr() needs "struct path" rather than "struct file".
Let's use path_get()/path_put() rather than get_file()/fput().

Signed-off-by: Tetsuo Handa 
Reviewed-by: Jan Kara 
Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ea9debf59b22..50c81ff44ae2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1205,7 +1205,7 @@ loop_set_status(struct loop_device *lo, const struct 
loop_info64 *info)
 static int
 loop_get_status(struct loop_device *lo, struct loop_info64 *info)
 {
-   struct file *file;
+   struct path path;
struct kstat stat;
int ret;
 
@@ -1230,16 +1230,16 @@ loop_get_status(struct loop_device *lo, struct 
loop_info64 *info)
}
 
/* Drop lo_ctl_mutex while we call into the filesystem. */
-   file = get_file(lo->lo_backing_file);
+   path = lo->lo_backing_file->f_path;
+   path_get();
mutex_unlock(>lo_ctl_mutex);
-   ret = vfs_getattr(>f_path, , STATX_INO,
- AT_STATX_SYNC_AS_STAT);
+   ret = vfs_getattr(, , STATX_INO, AT_STATX_SYNC_AS_STAT);
if (!ret) {
info->lo_device = huge_encode_dev(stat.dev);
info->lo_inode = stat.ino;
info->lo_rdevice = huge_encode_dev(stat.rdev);
}
-   fput(file);
+   path_put();
return ret;
 }
 
-- 
2.16.4



[PATCH 09/14] loop: Push loop_ctl_mutex down to loop_set_status()

2018-09-27 Thread Jan Kara
Push loop_ctl_mutex down to loop_set_status(). We will need this to be
able to call loop_reread_partitions() without loop_ctl_mutex.

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 51 +--
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 577d5e5a9312..1cc29bd77d67 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1142,46 +1142,55 @@ loop_set_status(struct loop_device *lo, const struct 
loop_info64 *info)
struct loop_func_table *xfer;
kuid_t uid = current_uid();
 
+   err = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (err)
+   return err;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
-   !capable(CAP_SYS_ADMIN))
-   return -EPERM;
-   if (lo->lo_state != Lo_bound)
-   return -ENXIO;
-   if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
-   return -EINVAL;
+   !capable(CAP_SYS_ADMIN)) {
+   err = -EPERM;
+   goto out_unlock;
+   }
+   if (lo->lo_state != Lo_bound) {
+   err = -ENXIO;
+   goto out_unlock;
+   }
+   if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) {
+   err = -EINVAL;
+   goto out_unlock;
+   }
 
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
 
err = loop_release_xfer(lo);
if (err)
-   goto exit;
+   goto out_unfreeze;
 
if (info->lo_encrypt_type) {
unsigned int type = info->lo_encrypt_type;
 
if (type >= MAX_LO_CRYPT) {
err = -EINVAL;
-   goto exit;
+   goto out_unfreeze;
}
xfer = xfer_funcs[type];
if (xfer == NULL) {
err = -EINVAL;
-   goto exit;
+   goto out_unfreeze;
}
} else
xfer = NULL;
 
err = loop_init_xfer(lo, xfer, info);
if (err)
-   goto exit;
+   goto out_unfreeze;
 
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
-   goto exit;
+   goto out_unfreeze;
}
}
 
@@ -1213,7 +1222,7 @@ loop_set_status(struct loop_device *lo, const struct 
loop_info64 *info)
/* update dio if lo_offset or transfer is changed */
__loop_update_dio(lo, lo->use_dio);
 
- exit:
+out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
 
if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
@@ -1222,6 +1231,8 @@ loop_set_status(struct loop_device *lo, const struct 
loop_info64 *info)
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
loop_reread_partitions(lo, lo->lo_device);
}
+out_unlock:
+   mutex_unlock(_ctl_mutex);
 
return err;
 }
@@ -1468,12 +1479,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t 
mode,
case LOOP_SET_STATUS:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
-   err = mutex_lock_killable_nested(_ctl_mutex, 1);
-   if (err)
-   return err;
err = loop_set_status_old(lo,
(struct loop_info __user *)arg);
-   mutex_unlock(_ctl_mutex);
}
break;
case LOOP_GET_STATUS:
@@ -1481,12 +1488,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t 
mode,
case LOOP_SET_STATUS64:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
-   err = mutex_lock_killable_nested(_ctl_mutex, 1);
-   if (err)
-   return err;
err = loop_set_status64(lo,
(struct loop_info64 __user *) arg);
-   mutex_unlock(_ctl_mutex);
}
break;
case LOOP_GET_STATUS64:
@@ -1631,12 +1634,8 @@ static int lo_compat_ioctl(struct block_device *bdev, 
fmode_t mode,
 
switch(cmd) {
case LOOP_SET_STATUS:
-   err = mutex_lock_killable(_ctl_mutex);
-   if (!err) {
-   err = loop_set_status_compat(lo,
-(const struct 
compat_loop_info __user *)a

[PATCH 10/14] loop: Push loop_ctl_mutex down to loop_set_fd()

2018-09-27 Thread Jan Kara
Push lo_ctl_mutex down to loop_set_fd(). We will need this to be able to
call loop_reread_partitions() without lo_ctl_mutex.

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 26 ++
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1cc29bd77d67..504e5ef07509 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -919,13 +919,17 @@ static int loop_set_fd(struct loop_device *lo, fmode_t 
mode,
if (!file)
goto out;
 
+   error = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (error)
+   goto out_putf;
+
error = -EBUSY;
if (lo->lo_state != Lo_unbound)
-   goto out_putf;
+   goto out_unlock;
 
error = loop_validate_file(file, bdev);
if (error)
-   goto out_putf;
+   goto out_unlock;
 
mapping = file->f_mapping;
inode = mapping->host;
@@ -937,10 +941,10 @@ static int loop_set_fd(struct loop_device *lo, fmode_t 
mode,
error = -EFBIG;
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size)
-   goto out_putf;
+   goto out_unlock;
error = loop_prepare_queue(lo);
if (error)
-   goto out_putf;
+   goto out_unlock;
 
error = 0;
 
@@ -979,11 +983,14 @@ static int loop_set_fd(struct loop_device *lo, fmode_t 
mode,
 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
 */
bdgrab(bdev);
+   mutex_unlock(_ctl_mutex);
return 0;
 
- out_putf:
+out_unlock:
+   mutex_unlock(_ctl_mutex);
+out_putf:
fput(file);
- out:
+out:
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return error;
@@ -1461,12 +1468,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t 
mode,
 
switch (cmd) {
case LOOP_SET_FD:
-   err = mutex_lock_killable_nested(_ctl_mutex, 1);
-   if (err)
-   return err;
-   err = loop_set_fd(lo, mode, bdev, arg);
-   mutex_unlock(_ctl_mutex);
-   break;
+   return loop_set_fd(lo, mode, bdev, arg);
case LOOP_CHANGE_FD:
err = mutex_lock_killable_nested(_ctl_mutex, 1);
if (err)
-- 
2.16.4



[PATCH 12/14] loop: Move special partition reread handling in loop_clr_fd()

2018-09-27 Thread Jan Kara
The call of __blkdev_reread_part() from loop_reread_partition() happens
only when we need to invalidate partitions from loop_release(). Thus
move a detection for this into loop_clr_fd() and simplify
loop_reread_partition().

This makes loop_reread_partition() safe to use without loop_ctl_mutex
because we use only lo->lo_number and lo->lo_file_name in case of error
for reporting purposes (thus possibly reporting outdate information is
not a big deal) and we are safe from 'lo' going away under us by
elevated lo->lo_refcnt.

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 33 +++--
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b4ea862f14fd..cce1a32ae6d0 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -631,18 +631,7 @@ static void loop_reread_partitions(struct loop_device *lo,
 {
int rc;
 
-   /*
-* bd_mutex has been held already in release path, so don't
-* acquire it if this function is called in such case.
-*
-* If the reread partition isn't from release path, lo_refcnt
-* must be at least one and it can only become zero when the
-* current holder is released.
-*/
-   if (!atomic_read(>lo_refcnt))
-   rc = __blkdev_reread_part(bdev);
-   else
-   rc = blkdev_reread_part(bdev);
+   rc = blkdev_reread_part(bdev);
if (rc)
pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -1096,8 +1085,24 @@ static int __loop_clr_fd(struct loop_device *lo)
module_put(THIS_MODULE);
blk_mq_unfreeze_queue(lo->lo_queue);
 
-   if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
-   loop_reread_partitions(lo, bdev);
+   if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) {
+   /*
+* bd_mutex has been held already in release path, so don't
+* acquire it if this function is called in such case.
+*
+* If the reread partition isn't from release path, lo_refcnt
+* must be at least one and it can only become zero when the
+* current holder is released.
+*/
+   if (!atomic_read(>lo_refcnt))
+   err = __blkdev_reread_part(bdev);
+   else
+   err = blkdev_reread_part(bdev);
+   pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
+   __func__, lo->lo_number, err);
+   /* Device is gone, no point in returning error */
+   err = 0;
+   }
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
-- 
2.16.4



[PATCH 11/14] loop: Push loop_ctl_mutex down to loop_change_fd()

2018-09-27 Thread Jan Kara
Push loop_ctl_mutex down to loop_change_fd(). We will need this to be
able to call loop_reread_partitions() without loop_ctl_mutex.

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 504e5ef07509..b4ea862f14fd 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -692,19 +692,22 @@ static int loop_change_fd(struct loop_device *lo, struct 
block_device *bdev,
struct file *file, *old_file;
int error;
 
+   error = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (error)
+   return error;
error = -ENXIO;
if (lo->lo_state != Lo_bound)
-   goto out;
+   goto out_unlock;
 
/* the loop device has to be read-only */
error = -EINVAL;
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
-   goto out;
+   goto out_unlock;
 
error = -EBADF;
file = fget(arg);
if (!file)
-   goto out;
+   goto out_unlock;
 
error = loop_validate_file(file, bdev);
if (error)
@@ -731,11 +734,13 @@ static int loop_change_fd(struct loop_device *lo, struct 
block_device *bdev,
fput(old_file);
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
loop_reread_partitions(lo, bdev);
+   mutex_unlock(_ctl_mutex);
return 0;
 
- out_putf:
+out_putf:
fput(file);
- out:
+out_unlock:
+   mutex_unlock(_ctl_mutex);
return error;
 }
 
@@ -1470,12 +1475,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t 
mode,
case LOOP_SET_FD:
return loop_set_fd(lo, mode, bdev, arg);
case LOOP_CHANGE_FD:
-   err = mutex_lock_killable_nested(_ctl_mutex, 1);
-   if (err)
-   return err;
-   err = loop_change_fd(lo, bdev, arg);
-   mutex_unlock(_ctl_mutex);
-   break;
+   return loop_change_fd(lo, bdev, arg);
case LOOP_CLR_FD:
return loop_clr_fd(lo);
case LOOP_SET_STATUS:
-- 
2.16.4



[PATCH 05/14] loop: Push lo_ctl_mutex down into individual ioctls

2018-09-27 Thread Jan Kara
Push acquisition of lo_ctl_mutex down into individual ioctl handling
branches. This is a preparatory step for pushing the lock down into
individual ioctl handling functions so that they can release the lock as
they need it. We also factor out some simple ioctl handlers that will
not need any special handling to reduce unnecessary code duplication.

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 88 +---
 1 file changed, 63 insertions(+), 25 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e35707fb8318..a86ef20c15e2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1394,70 +1394,108 @@ static int loop_set_block_size(struct loop_device *lo, 
unsigned long arg)
return 0;
 }
 
-static int lo_ioctl(struct block_device *bdev, fmode_t mode,
-   unsigned int cmd, unsigned long arg)
+static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
+  unsigned long arg)
 {
-   struct loop_device *lo = bdev->bd_disk->private_data;
int err;
 
err = mutex_lock_killable_nested(_ctl_mutex, 1);
if (err)
-   goto out_unlocked;
+   return err;
+   switch (cmd) {
+   case LOOP_SET_CAPACITY:
+   err = loop_set_capacity(lo);
+   break;
+   case LOOP_SET_DIRECT_IO:
+   err = loop_set_dio(lo, arg);
+   break;
+   case LOOP_SET_BLOCK_SIZE:
+   err = loop_set_block_size(lo, arg);
+   break;
+   default:
+   err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
+   }
+   mutex_unlock(_ctl_mutex);
+   return err;
+}
+
+static int lo_ioctl(struct block_device *bdev, fmode_t mode,
+   unsigned int cmd, unsigned long arg)
+{
+   struct loop_device *lo = bdev->bd_disk->private_data;
+   int err;
 
switch (cmd) {
case LOOP_SET_FD:
+   err = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (err)
+   return err;
err = loop_set_fd(lo, mode, bdev, arg);
+   mutex_unlock(_ctl_mutex);
break;
case LOOP_CHANGE_FD:
+   err = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (err)
+   return err;
err = loop_change_fd(lo, bdev, arg);
+   mutex_unlock(_ctl_mutex);
break;
case LOOP_CLR_FD:
+   err = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (err)
+   return err;
/* loop_clr_fd would have unlocked loop_ctl_mutex on success */
err = loop_clr_fd(lo);
-   if (!err)
-   goto out_unlocked;
+   if (err)
+   mutex_unlock(_ctl_mutex);
break;
case LOOP_SET_STATUS:
err = -EPERM;
-   if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+   if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
+   err = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (err)
+   return err;
err = loop_set_status_old(lo,
(struct loop_info __user *)arg);
+   mutex_unlock(_ctl_mutex);
+   }
break;
case LOOP_GET_STATUS:
+   err = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (err)
+   return err;
err = loop_get_status_old(lo, (struct loop_info __user *) arg);
/* loop_get_status() unlocks loop_ctl_mutex */
-   goto out_unlocked;
+   break;
case LOOP_SET_STATUS64:
err = -EPERM;
-   if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+   if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
+   err = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (err)
+   return err;
err = loop_set_status64(lo,
(struct loop_info64 __user *) arg);
+   mutex_unlock(_ctl_mutex);
+   }
break;
case LOOP_GET_STATUS64:
+   err = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (err)
+   return err;
err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
/* loop_get_status() unlocks loop_ctl_mutex */
-   goto out_unlocked;
-   case LOOP_SET_CAPACITY:
-   err = -EPERM;
-   if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
-   err = loop_set_capacity(lo);
break;
+ 

[PATCH 0/14] loop: Fix oops and possible deadlocks

2018-09-27 Thread Jan Kara
Hi,

this patch series fixes oops and possible deadlocks as reported by syzbot [1]
[2]. The second patch in the series (from Tetsuo) fixes the oops, the remaining
patches are cleaning up the locking in the loop driver so that we can in the
end reasonably easily switch to rereading partitions without holding mutex
protecting the loop device.

I have lightly tested the patches by creating, deleting, and modifying loop
devices but if there's some more comprehensive loopback device testsuite, I
can try running it. Review is welcome!

Honza

[1] 
https://syzkaller.appspot.com/bug?id=f3cfe26e785d85f9ee259f385515291d21bd80a3
[2] 
https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d15889



[PATCH 07/14] loop: Push loop_ctl_mutex down into loop_clr_fd()

2018-09-27 Thread Jan Kara
loop_clr_fd() has a weird locking convention that is expects
loop_ctl_mutex held, releases it on success and keeps it on failure.
Untangle the mess by moving locking of loop_ctl_mutex into
loop_clr_fd().

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 49 +
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 51d11898e170..e4b82ca49286 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1028,15 +1028,22 @@ loop_init_xfer(struct loop_device *lo, struct 
loop_func_table *xfer,
 
 static int __loop_clr_fd(struct loop_device *lo)
 {
-   struct file *filp = lo->lo_backing_file;
+   struct file *filp = NULL;
gfp_t gfp = lo->old_gfp_mask;
struct block_device *bdev = lo->lo_device;
+   int err = 0;
 
-   if (WARN_ON_ONCE(lo->lo_state != Lo_rundown))
-   return -ENXIO;
+   mutex_lock(_ctl_mutex);
+   if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
+   err = -ENXIO;
+   goto out_unlock;
+   }
 
-   if (filp == NULL)
-   return -EINVAL;
+   filp = lo->lo_backing_file;
+   if (filp == NULL) {
+   err = -EINVAL;
+   goto out_unlock;
+   }
 
/* freeze request queue during the transition */
blk_mq_freeze_queue(lo->lo_queue);
@@ -1083,6 +1090,7 @@ static int __loop_clr_fd(struct loop_device *lo)
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
loop_unprepare_queue(lo);
+out_unlock:
mutex_unlock(_ctl_mutex);
/*
 * Need not hold loop_ctl_mutex to fput backing file.
@@ -1090,14 +1098,22 @@ static int __loop_clr_fd(struct loop_device *lo)
 * lock dependency possibility warning as fput can take
 * bd_mutex which is usually taken before loop_ctl_mutex.
 */
-   fput(filp);
-   return 0;
+   if (filp)
+   fput(filp);
+   return err;
 }
 
 static int loop_clr_fd(struct loop_device *lo)
 {
-   if (lo->lo_state != Lo_bound)
+   int err;
+
+   err = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (err)
+   return err;
+   if (lo->lo_state != Lo_bound) {
+   mutex_unlock(_ctl_mutex);
return -ENXIO;
+   }
/*
 * If we've explicitly asked to tear down the loop device,
 * and it has an elevated reference count, set it for auto-teardown when
@@ -1114,6 +1130,7 @@ static int loop_clr_fd(struct loop_device *lo)
return 0;
}
lo->lo_state = Lo_rundown;
+   mutex_unlock(_ctl_mutex);
 
return __loop_clr_fd(lo);
 }
@@ -1448,14 +1465,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t 
mode,
mutex_unlock(_ctl_mutex);
break;
case LOOP_CLR_FD:
-   err = mutex_lock_killable_nested(_ctl_mutex, 1);
-   if (err)
-   return err;
-   /* loop_clr_fd would have unlocked loop_ctl_mutex on success */
-   err = loop_clr_fd(lo);
-   if (err)
-   mutex_unlock(_ctl_mutex);
-   break;
+   return loop_clr_fd(lo);
case LOOP_SET_STATUS:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
@@ -1691,7 +1701,6 @@ static int lo_open(struct block_device *bdev, fmode_t 
mode)
 static void lo_release(struct gendisk *disk, fmode_t mode)
 {
struct loop_device *lo;
-   int err;
 
mutex_lock(_ctl_mutex);
lo = disk->private_data;
@@ -1702,13 +1711,13 @@ static void lo_release(struct gendisk *disk, fmode_t 
mode)
if (lo->lo_state != Lo_bound)
goto out_unlock;
lo->lo_state = Lo_rundown;
+   mutex_unlock(_ctl_mutex);
/*
 * In autoclear mode, stop the loop thread
 * and remove configuration after last close.
 */
-   err = __loop_clr_fd(lo);
-   if (!err)
-   return;
+   __loop_clr_fd(lo);
+   return;
} else if (lo->lo_state == Lo_bound) {
/*
 * Otherwise keep thread (if running) and config,
-- 
2.16.4



[PATCH 04/14] loop: Get rid of loop_index_mutex

2018-09-27 Thread Jan Kara
Now that loop_ctl_mutex is global, just get rid of loop_index_mutex as
there is no good reason to keep these two separate and it just
complicates the locking.

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 38 ++
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cc43d835fe6f..e35707fb8318 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -83,7 +83,6 @@
 #include 
 
 static DEFINE_IDR(loop_index_idr);
-static DEFINE_MUTEX(loop_index_mutex);
 static DEFINE_MUTEX(loop_ctl_mutex);
 
 static int max_part;
@@ -1627,9 +1626,11 @@ static int lo_compat_ioctl(struct block_device *bdev, 
fmode_t mode,
 static int lo_open(struct block_device *bdev, fmode_t mode)
 {
struct loop_device *lo;
-   int err = 0;
+   int err;
 
-   mutex_lock(_index_mutex);
+   err = mutex_lock_killable(_ctl_mutex);
+   if (err)
+   return err;
lo = bdev->bd_disk->private_data;
if (!lo) {
err = -ENXIO;
@@ -1638,7 +1639,7 @@ static int lo_open(struct block_device *bdev, fmode_t 
mode)
 
atomic_inc(>lo_refcnt);
 out:
-   mutex_unlock(_index_mutex);
+   mutex_unlock(_ctl_mutex);
return err;
 }
 
@@ -1647,12 +1648,11 @@ static void lo_release(struct gendisk *disk, fmode_t 
mode)
struct loop_device *lo;
int err;
 
-   mutex_lock(_index_mutex);
+   mutex_lock(_ctl_mutex);
lo = disk->private_data;
if (atomic_dec_return(>lo_refcnt))
-   goto unlock_index;
+   goto out_unlock;
 
-   mutex_lock(_ctl_mutex);
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
/*
 * In autoclear mode, stop the loop thread
@@ -1660,7 +1660,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 */
err = loop_clr_fd(lo);
if (!err)
-   goto unlock_index;
+   return;
} else if (lo->lo_state == Lo_bound) {
/*
 * Otherwise keep thread (if running) and config,
@@ -1670,9 +1670,8 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
blk_mq_unfreeze_queue(lo->lo_queue);
}
 
+out_unlock:
mutex_unlock(_ctl_mutex);
-unlock_index:
-   mutex_unlock(_index_mutex);
 }
 
 static const struct block_device_operations lo_fops = {
@@ -1973,7 +1972,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, 
void *data)
struct kobject *kobj;
int err;
 
-   mutex_lock(_index_mutex);
+   mutex_lock(_ctl_mutex);
err = loop_lookup(, MINOR(dev) >> part_shift);
if (err < 0)
err = loop_add(, MINOR(dev) >> part_shift);
@@ -1981,7 +1980,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, 
void *data)
kobj = NULL;
else
kobj = get_disk_and_module(lo->lo_disk);
-   mutex_unlock(_index_mutex);
+   mutex_unlock(_ctl_mutex);
 
*part = 0;
return kobj;
@@ -1993,7 +1992,10 @@ static long loop_control_ioctl(struct file *file, 
unsigned int cmd,
struct loop_device *lo;
int ret = -ENOSYS;
 
-   mutex_lock(_index_mutex);
+   ret = mutex_lock_killable(_ctl_mutex);
+   if (ret)
+   return ret;
+
switch (cmd) {
case LOOP_CTL_ADD:
ret = loop_lookup(, parm);
@@ -2007,9 +2009,6 @@ static long loop_control_ioctl(struct file *file, 
unsigned int cmd,
ret = loop_lookup(, parm);
if (ret < 0)
break;
-   ret = mutex_lock_killable(_ctl_mutex);
-   if (ret)
-   break;
if (lo->lo_state != Lo_unbound) {
ret = -EBUSY;
mutex_unlock(_ctl_mutex);
@@ -2021,7 +2020,6 @@ static long loop_control_ioctl(struct file *file, 
unsigned int cmd,
break;
}
lo->lo_disk->private_data = NULL;
-   mutex_unlock(_ctl_mutex);
idr_remove(_index_idr, lo->lo_number);
loop_remove(lo);
break;
@@ -2031,7 +2029,7 @@ static long loop_control_ioctl(struct file *file, 
unsigned int cmd,
break;
ret = loop_add(, -1);
}
-   mutex_unlock(_index_mutex);
+   mutex_unlock(_ctl_mutex);
 
return ret;
 }
@@ -2115,10 +2113,10 @@ static int __init loop_init(void)
  THIS_MODULE, loop_probe, NULL, NULL);
 
/* pre-create number of devices given by config or max_loop */
-   mutex_lock(_index_mutex);
+   mutex_lock(_ctl_mutex);
for (i = 0; i < nr; i++)
loop_add(, i);
-   mutex_unlock(_index_mutex);
+   mutex_unloc

[PATCH 13/14] loop: Move loop_reread_partitions() out of loop_ctl_mutex

2018-09-27 Thread Jan Kara
Calling loop_reread_partitions() under loop_ctl_mutex causes lockdep to
complain about circular lock dependency between bdev->bd_mutex and
lo->lo_ctl_mutex. The problem is that on loop device open or close
lo_open() and lo_release() get called with bdev->bd_mutex held and they
need to acquire loop_ctl_mutex. OTOH when loop_reread_partitions() is
called with loop_ctl_mutex held, it will call blkdev_reread_part() which
acquires bdev->bd_mutex. See syzbot report for details [1].

Move all calls of loop_rescan_partitions() out of loop_ctl_mutex to
avoid lockdep warning and fix deadlock possibility.

[1] https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d1588

Reported-by: syzbot 

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index cce1a32ae6d0..d2be85c48f03 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -680,6 +680,7 @@ static int loop_change_fd(struct loop_device *lo, struct 
block_device *bdev,
 {
struct file *file, *old_file;
int error;
+   boolpartscan;
 
error = mutex_lock_killable_nested(_ctl_mutex, 1);
if (error)
@@ -721,9 +722,10 @@ static int loop_change_fd(struct loop_device *lo, struct 
block_device *bdev,
blk_mq_unfreeze_queue(lo->lo_queue);
 
fput(old_file);
-   if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-   loop_reread_partitions(lo, bdev);
+   partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
mutex_unlock(_ctl_mutex);
+   if (partscan)
+   loop_reread_partitions(lo, bdev);
return 0;
 
 out_putf:
@@ -904,6 +906,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
int lo_flags = 0;
int error;
loff_t  size;
+   boolpartscan;
 
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
@@ -970,14 +973,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t 
mode,
lo->lo_state = Lo_bound;
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
-   if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-   loop_reread_partitions(lo, bdev);
+   partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
 
/* Grab the block_device to prevent its destruction after we
 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
 */
bdgrab(bdev);
mutex_unlock(_ctl_mutex);
+   if (partscan)
+   loop_reread_partitions(lo, bdev);
return 0;
 
 out_unlock:
@@ -1158,6 +1162,8 @@ loop_set_status(struct loop_device *lo, const struct 
loop_info64 *info)
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
+   struct block_device *bdev;
+   bool partscan = false;
 
err = mutex_lock_killable_nested(_ctl_mutex, 1);
if (err)
@@ -1246,10 +1252,13 @@ loop_set_status(struct loop_device *lo, const struct 
loop_info64 *info)
 !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_flags |= LO_FLAGS_PARTSCAN;
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
-   loop_reread_partitions(lo, lo->lo_device);
+   bdev = lo->lo_device;
+   partscan = true;
}
 out_unlock:
mutex_unlock(_ctl_mutex);
+   if (partscan)
+   loop_reread_partitions(lo, bdev);
 
return err;
 }
-- 
2.16.4



[PATCH 03/14] loop: Fold __loop_release into loop_release

2018-09-27 Thread Jan Kara
__loop_release() has a single call site. Fold it there. This is
currently not a huge win but it will make following replacement of
loop_index_mutex more obvious.

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d0f1b7106572..cc43d835fe6f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1642,12 +1642,15 @@ static int lo_open(struct block_device *bdev, fmode_t 
mode)
return err;
 }
 
-static void __lo_release(struct loop_device *lo)
+static void lo_release(struct gendisk *disk, fmode_t mode)
 {
+   struct loop_device *lo;
int err;
 
+   mutex_lock(_index_mutex);
+   lo = disk->private_data;
if (atomic_dec_return(>lo_refcnt))
-   return;
+   goto unlock_index;
 
mutex_lock(_ctl_mutex);
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
@@ -1657,7 +1660,7 @@ static void __lo_release(struct loop_device *lo)
 */
err = loop_clr_fd(lo);
if (!err)
-   return;
+   goto unlock_index;
} else if (lo->lo_state == Lo_bound) {
/*
 * Otherwise keep thread (if running) and config,
@@ -1668,12 +1671,7 @@ static void __lo_release(struct loop_device *lo)
}
 
mutex_unlock(_ctl_mutex);
-}
-
-static void lo_release(struct gendisk *disk, fmode_t mode)
-{
-   mutex_lock(_index_mutex);
-   __lo_release(disk->private_data);
+unlock_index:
mutex_unlock(_index_mutex);
 }
 
-- 
2.16.4



[PATCH 14/14] loop: Fix deadlock when calling blkdev_reread_part()

2018-09-27 Thread Jan Kara
Calling blkdev_reread_part() under loop_ctl_mutex causes lockdep to
complain about circular lock dependency between bdev->bd_mutex and
lo->lo_ctl_mutex. The problem is that on loop device open or close
lo_open() and lo_release() get called with bdev->bd_mutex held and they
need to acquire loop_ctl_mutex. OTOH when loop_reread_partitions() is
called with loop_ctl_mutex held, it will call blkdev_reread_part() which
acquires bdev->bd_mutex. See syzbot report for details [1].

Move call to blkdev_reread_part() in __loop_clr_fd() from under
loop_ctl_mutex to finish fixing of the lockdep warning and the possible
deadlock.

[1] https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d1588

Reported-by: syzbot 

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d2be85c48f03..a0fb7bf62b29 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1031,12 +1031,14 @@ loop_init_xfer(struct loop_device *lo, struct 
loop_func_table *xfer,
return err;
 }
 
-static int __loop_clr_fd(struct loop_device *lo)
+static int __loop_clr_fd(struct loop_device *lo, bool release)
 {
struct file *filp = NULL;
gfp_t gfp = lo->old_gfp_mask;
struct block_device *bdev = lo->lo_device;
int err = 0;
+   bool partscan = false;
+   int lo_number;
 
mutex_lock(_ctl_mutex);
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
@@ -1089,7 +1091,15 @@ static int __loop_clr_fd(struct loop_device *lo)
module_put(THIS_MODULE);
blk_mq_unfreeze_queue(lo->lo_queue);
 
-   if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) {
+   partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev;
+   lo_number = lo->lo_number;
+   lo->lo_flags = 0;
+   if (!part_shift)
+   lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
+   loop_unprepare_queue(lo);
+out_unlock:
+   mutex_unlock(_ctl_mutex);
+   if (partscan) {
/*
 * bd_mutex has been held already in release path, so don't
 * acquire it if this function is called in such case.
@@ -1098,21 +1108,15 @@ static int __loop_clr_fd(struct loop_device *lo)
 * must be at least one and it can only become zero when the
 * current holder is released.
 */
-   if (!atomic_read(>lo_refcnt))
+   if (release)
err = __blkdev_reread_part(bdev);
else
err = blkdev_reread_part(bdev);
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
-   __func__, lo->lo_number, err);
+   __func__, lo_number, err);
/* Device is gone, no point in returning error */
err = 0;
}
-   lo->lo_flags = 0;
-   if (!part_shift)
-   lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
-   loop_unprepare_queue(lo);
-out_unlock:
-   mutex_unlock(_ctl_mutex);
/*
 * Need not hold loop_ctl_mutex to fput backing file.
 * Calling fput holding loop_ctl_mutex triggers a circular
@@ -1153,7 +1157,7 @@ static int loop_clr_fd(struct loop_device *lo)
lo->lo_state = Lo_rundown;
mutex_unlock(_ctl_mutex);
 
-   return __loop_clr_fd(lo);
+   return __loop_clr_fd(lo, false);
 }
 
 static int
@@ -1714,7 +1718,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 * In autoclear mode, stop the loop thread
 * and remove configuration after last close.
 */
-   __loop_clr_fd(lo);
+   __loop_clr_fd(lo, true);
return;
} else if (lo->lo_state == Lo_bound) {
/*
-- 
2.16.4



[PATCH 08/14] loop: Push loop_ctl_mutex down to loop_get_status()

2018-09-27 Thread Jan Kara
Push loop_ctl_mutex down to loop_get_status() to avoid the unusual
convention that the function gets called with loop_ctl_mutex held and
releases it.

Signed-off-by: Jan Kara 
---
 drivers/block/loop.c | 37 ++---
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e4b82ca49286..577d5e5a9312 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1233,6 +1233,9 @@ loop_get_status(struct loop_device *lo, struct 
loop_info64 *info)
struct kstat stat;
int ret;
 
+   ret = mutex_lock_killable_nested(_ctl_mutex, 1);
+   if (ret)
+   return ret;
if (lo->lo_state != Lo_bound) {
mutex_unlock(_ctl_mutex);
return -ENXIO;
@@ -1347,10 +1350,8 @@ loop_get_status_old(struct loop_device *lo, struct 
loop_info __user *arg) {
struct loop_info64 info64;
int err;
 
-   if (!arg) {
-   mutex_unlock(_ctl_mutex);
+   if (!arg)
return -EINVAL;
-   }
err = loop_get_status(lo, );
if (!err)
err = loop_info64_to_old(, );
@@ -1365,10 +1366,8 @@ loop_get_status64(struct loop_device *lo, struct 
loop_info64 __user *arg) {
struct loop_info64 info64;
int err;
 
-   if (!arg) {
-   mutex_unlock(_ctl_mutex);
+   if (!arg)
return -EINVAL;
-   }
err = loop_get_status(lo, );
if (!err && copy_to_user(arg, , sizeof(info64)))
err = -EFAULT;
@@ -1478,12 +1477,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t 
mode,
}
break;
case LOOP_GET_STATUS:
-   err = mutex_lock_killable_nested(_ctl_mutex, 1);
-   if (err)
-   return err;
-   err = loop_get_status_old(lo, (struct loop_info __user *) arg);
-   /* loop_get_status() unlocks loop_ctl_mutex */
-   break;
+   return loop_get_status_old(lo, (struct loop_info __user *) arg);
case LOOP_SET_STATUS64:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
@@ -1496,12 +1490,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t 
mode,
}
break;
case LOOP_GET_STATUS64:
-   err = mutex_lock_killable_nested(_ctl_mutex, 1);
-   if (err)
-   return err;
-   err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
-   /* loop_get_status() unlocks loop_ctl_mutex */
-   break;
+   return loop_get_status64(lo, (struct loop_info64 __user *) arg);
case LOOP_SET_CAPACITY:
case LOOP_SET_DIRECT_IO:
case LOOP_SET_BLOCK_SIZE:
@@ -1626,10 +1615,8 @@ loop_get_status_compat(struct loop_device *lo,
struct loop_info64 info64;
int err;
 
-   if (!arg) {
-   mutex_unlock(_ctl_mutex);
+   if (!arg)
return -EINVAL;
-   }
err = loop_get_status(lo, );
if (!err)
err = loop_info64_to_compat(, arg);
@@ -1652,12 +1639,8 @@ static int lo_compat_ioctl(struct block_device *bdev, 
fmode_t mode,
}
break;
case LOOP_GET_STATUS:
-   err = mutex_lock_killable(_ctl_mutex);
-   if (!err) {
-   err = loop_get_status_compat(lo,
-(struct compat_loop_info 
__user *)arg);
-   /* loop_get_status() unlocks loop_ctl_mutex */
-   }
+   err = loop_get_status_compat(lo,
+(struct compat_loop_info __user *)arg);
break;
case LOOP_SET_CAPACITY:
case LOOP_CLR_FD:
-- 
2.16.4



Re: [PATCH 4/4] block/loop: Fix circular locking dependency at blkdev_reread_part().

2018-09-27 Thread Jan Kara
On Thu 27-09-18 20:35:27, Tetsuo Handa wrote:
> On 2018/09/27 20:27, Jan Kara wrote:
> > Hi,
> > 
> > On Wed 26-09-18 00:26:49, Tetsuo Handa wrote:
> >> syzbot is reporting circular locking dependency between bdev->bd_mutex
> >> and lo->lo_ctl_mutex [1] which is caused by calling blkdev_reread_part()
> >> with lock held. We need to drop lo->lo_ctl_mutex in order to fix it.
> >>
> >> This patch fixes it by combining loop_index_mutex and loop_ctl_mutex into
> >> loop_mutex, and releasing loop_mutex before calling blkdev_reread_part()
> >> or fput() or path_put() or leaving ioctl().
> >>
> >> The rule is that current thread calls lock_loop() before accessing
> >> "struct loop_device", and current thread no longer accesses "struct
> >> loop_device" after unlock_loop() is called.
> >>
> >> Since syzbot is reporting various bugs [2] where a race in the loop module
> >> is suspected, let's check whether this patch affects these bugs too.
> >>
> >> [1] 
> >> https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d15889
> >> [2] 
> >> https://syzkaller.appspot.com/bug?id=b3c7e1440aa8ece16bf557dbac427fdff1dad9d6
> >>
> >> Signed-off-by: Tetsuo Handa 
> >> Reported-by: syzbot 
> >> 
> >> ---
> >>  drivers/block/loop.c | 187 
> >> ---
> >>  1 file changed, 101 insertions(+), 86 deletions(-)
> > 
> > I still don't like this patch. I'll post a patch series showing what I have
> > in mind. Admittedly, it's a bit tedious but the locking is much saner
> > afterwards...
> 
> Please be sure to Cc: me. I'm not subscribed to linux-block ML.

Yes, I've CCed you.

> But if we have to release lo_ctl_mutex before calling blkdev_reread_part(),
> what is nice with re-acquiring lo_ctl_mutex after blkdev_reread_part() ?

We don't reacquire the mutex after blkdev_reread_part(). Just the code
needed to be cleaned up so that loop_reread_part() does not need
lo_ctl_mutex for anything.

Honza

-- 
Jan Kara 
SUSE Labs, CR


Re: [PATCH 8/8] aio: support for IO polling

2018-11-22 Thread Jan Kara


On Tue 20-11-18 10:19:53, Jens Axboe wrote:
> +/*
> + * We can't just wait for polled events to come to us, we have to actively
> + * find and complete them.
> + */
> +static void aio_iopoll_reap_events(struct kioctx *ctx)
> +{
> + if (!(ctx->flags & IOCTX_FLAG_IOPOLL))
> + return;
> +
> + while (!list_empty_careful(>poll_submitted) ||
> +!list_empty(>poll_completing)) {
> + unsigned int nr_events = 0;
> +
> + __aio_iopoll_check(ctx, NULL, _events, 1, UINT_MAX);
> + }
> +}
> +
> +static int aio_iopoll_check(struct kioctx *ctx, long min_nr, long nr,
> + struct io_event __user *event)
> +{
> + unsigned int nr_events = 0;
> + int ret = 0;
> +
> + /* * Only allow one thread polling at a time */
> + if (test_and_set_bit(0, >getevents_busy))
> + return -EBUSY;
> +
> + while (!nr_events || !need_resched()) {
> + int tmin = 0;
> +
> + if (nr_events < min_nr)
> + tmin = min_nr - nr_events;
> +
> + ret = __aio_iopoll_check(ctx, event, _events, tmin, nr);
> + if (ret <= 0)
> + break;
> + ret = 0;
> + }
> +
> + clear_bit(0, >getevents_busy);
> + return nr_events ? nr_events : ret;
> +}

Hum, what if userspace calls io_destroy() while another process is polling
for events on the same kioctx? It seems we'd be reaping events from two
processes in parallel in that case which will result in various
"interesting" effects like ctx->poll_completing list corruption...

Honza
-- 
Jan Kara 
SUSE Labs, CR


<    1   2   3   4   5