[PATCH 06/14] loop: Split setting of lo_state from loop_clr_fd
Move setting of lo_state to Lo_rundown out into the callers. That will allow us to unlock loop_ctl_mutex while the loop device is protected from other changes by its special state. Signed-off-by: Jan Kara --- drivers/block/loop.c | 52 +++- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a86ef20c15e2..51d11898e170 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -976,7 +976,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, loop_reread_partitions(lo, bdev); /* Grab the block_device to prevent its destruction after we -* put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev). +* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); return 0; @@ -1026,31 +1026,15 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, return err; } -static int loop_clr_fd(struct loop_device *lo) +static int __loop_clr_fd(struct loop_device *lo) { struct file *filp = lo->lo_backing_file; gfp_t gfp = lo->old_gfp_mask; struct block_device *bdev = lo->lo_device; - if (lo->lo_state != Lo_bound) + if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) return -ENXIO; - /* -* If we've explicitly asked to tear down the loop device, -* and it has an elevated reference count, set it for auto-teardown when -* the last reference goes away. This stops $!~#$@ udev from -* preventing teardown because it decided that it needs to run blkid on -* the loopback device whenever they appear. xfstests is notorious for -* failing tests because blkid via udev races with a losetup -* /do something like mkfs/losetup -d causing the losetup -d -* command to fail with EBUSY. -*/ - if (atomic_read(>lo_refcnt) > 1) { - lo->lo_flags |= LO_FLAGS_AUTOCLEAR; - mutex_unlock(_ctl_mutex); - return 0; - } - if (filp == NULL) return -EINVAL; @@ -1058,7 +1042,6 @@ static int loop_clr_fd(struct loop_device *lo) blk_mq_freeze_queue(lo->lo_queue); spin_lock_irq(>lo_lock); - lo->lo_state = Lo_rundown; lo->lo_backing_file = NULL; spin_unlock_irq(>lo_lock); @@ -,6 +1094,30 @@ static int loop_clr_fd(struct loop_device *lo) return 0; } +static int loop_clr_fd(struct loop_device *lo) +{ + if (lo->lo_state != Lo_bound) + return -ENXIO; + /* +* If we've explicitly asked to tear down the loop device, +* and it has an elevated reference count, set it for auto-teardown when +* the last reference goes away. This stops $!~#$@ udev from +* preventing teardown because it decided that it needs to run blkid on +* the loopback device whenever they appear. xfstests is notorious for +* failing tests because blkid via udev races with a losetup +* /do something like mkfs/losetup -d causing the losetup -d +* command to fail with EBUSY. +*/ + if (atomic_read(>lo_refcnt) > 1) { + lo->lo_flags |= LO_FLAGS_AUTOCLEAR; + mutex_unlock(_ctl_mutex); + return 0; + } + lo->lo_state = Lo_rundown; + + return __loop_clr_fd(lo); +} + static int loop_set_status(struct loop_device *lo, const struct loop_info64 *info) { @@ -1692,11 +1699,14 @@ static void lo_release(struct gendisk *disk, fmode_t mode) goto out_unlock; if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { + if (lo->lo_state != Lo_bound) + goto out_unlock; + lo->lo_state = Lo_rundown; /* * In autoclear mode, stop the loop thread * and remove configuration after last close. */ - err = loop_clr_fd(lo); + err = __loop_clr_fd(lo); if (!err) return; } else if (lo->lo_state == Lo_bound) { -- 2.16.4
[PATCH 02/14] block/loop: Use global lock for ioctl() operation.
From: Tetsuo Handa syzbot is reporting NULL pointer dereference [1] which is caused by race condition between ioctl(loop_fd, LOOP_CLR_FD, 0) versus ioctl(other_loop_fd, LOOP_SET_FD, loop_fd) due to traversing other loop devices at loop_validate_file() without holding corresponding lo->lo_ctl_mutex locks. Since ioctl() request on loop devices is not frequent operation, we don't need fine grained locking. Let's use global lock in order to allow safe traversal at loop_validate_file(). Note that syzbot is also reporting circular locking dependency between bdev->bd_mutex and lo->lo_ctl_mutex [2] which is caused by calling blkdev_reread_part() with lock held. This patch does not address it. [1] https://syzkaller.appspot.com/bug?id=f3cfe26e785d85f9ee259f385515291d21bd80a3 [2] https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d15889 Signed-off-by: Tetsuo Handa Reported-by: syzbot Reviewed-by: Jan Kara Signed-off-by: Jan Kara --- drivers/block/loop.c | 58 ++-- drivers/block/loop.h | 1 - 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 50c81ff44ae2..d0f1b7106572 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -84,6 +84,7 @@ static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_index_mutex); +static DEFINE_MUTEX(loop_ctl_mutex); static int max_part; static int part_shift; @@ -1047,7 +1048,7 @@ static int loop_clr_fd(struct loop_device *lo) */ if (atomic_read(>lo_refcnt) > 1) { lo->lo_flags |= LO_FLAGS_AUTOCLEAR; - mutex_unlock(>lo_ctl_mutex); + mutex_unlock(_ctl_mutex); return 0; } @@ -1100,12 +1101,12 @@ static int loop_clr_fd(struct loop_device *lo) if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; loop_unprepare_queue(lo); - mutex_unlock(>lo_ctl_mutex); + mutex_unlock(_ctl_mutex); /* -* Need not hold lo_ctl_mutex to fput backing file. -* Calling fput holding lo_ctl_mutex triggers a circular +* Need not hold loop_ctl_mutex to fput backing file. +* Calling fput holding loop_ctl_mutex triggers a circular * lock dependency possibility warning as fput can take -* bd_mutex which is usually taken before lo_ctl_mutex. +* bd_mutex which is usually taken before loop_ctl_mutex. */ fput(filp); return 0; @@ -1210,7 +1211,7 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) int ret; if (lo->lo_state != Lo_bound) { - mutex_unlock(>lo_ctl_mutex); + mutex_unlock(_ctl_mutex); return -ENXIO; } @@ -1229,10 +1230,10 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) lo->lo_encrypt_key_size); } - /* Drop lo_ctl_mutex while we call into the filesystem. */ + /* Drop loop_ctl_mutex while we call into the filesystem. */ path = lo->lo_backing_file->f_path; path_get(); - mutex_unlock(>lo_ctl_mutex); + mutex_unlock(_ctl_mutex); ret = vfs_getattr(, , STATX_INO, AT_STATX_SYNC_AS_STAT); if (!ret) { info->lo_device = huge_encode_dev(stat.dev); @@ -1324,7 +1325,7 @@ loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { int err; if (!arg) { - mutex_unlock(>lo_ctl_mutex); + mutex_unlock(_ctl_mutex); return -EINVAL; } err = loop_get_status(lo, ); @@ -1342,7 +1343,7 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { int err; if (!arg) { - mutex_unlock(>lo_ctl_mutex); + mutex_unlock(_ctl_mutex); return -EINVAL; } err = loop_get_status(lo, ); @@ -1400,7 +1401,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, struct loop_device *lo = bdev->bd_disk->private_data; int err; - err = mutex_lock_killable_nested(>lo_ctl_mutex, 1); + err = mutex_lock_killable_nested(_ctl_mutex, 1); if (err) goto out_unlocked; @@ -1412,7 +1413,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, err = loop_change_fd(lo, bdev, arg); break; case LOOP_CLR_FD: - /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ + /* loop_clr_fd would have unlocked loop_ctl_mutex on success */ err = loop_clr_fd(lo); if (!err) goto out_unlocked; @@ -1425,7 +1426,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, break; case LOOP_GET_STATUS:
[PATCH 01/14] block/loop: Don't grab "struct file" for vfs_getattr() operation.
From: Tetsuo Handa vfs_getattr() needs "struct path" rather than "struct file". Let's use path_get()/path_put() rather than get_file()/fput(). Signed-off-by: Tetsuo Handa Reviewed-by: Jan Kara Signed-off-by: Jan Kara --- drivers/block/loop.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ea9debf59b22..50c81ff44ae2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1205,7 +1205,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) static int loop_get_status(struct loop_device *lo, struct loop_info64 *info) { - struct file *file; + struct path path; struct kstat stat; int ret; @@ -1230,16 +1230,16 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) } /* Drop lo_ctl_mutex while we call into the filesystem. */ - file = get_file(lo->lo_backing_file); + path = lo->lo_backing_file->f_path; + path_get(); mutex_unlock(>lo_ctl_mutex); - ret = vfs_getattr(>f_path, , STATX_INO, - AT_STATX_SYNC_AS_STAT); + ret = vfs_getattr(, , STATX_INO, AT_STATX_SYNC_AS_STAT); if (!ret) { info->lo_device = huge_encode_dev(stat.dev); info->lo_inode = stat.ino; info->lo_rdevice = huge_encode_dev(stat.rdev); } - fput(file); + path_put(); return ret; } -- 2.16.4
[PATCH 09/14] loop: Push loop_ctl_mutex down to loop_set_status()
Push loop_ctl_mutex down to loop_set_status(). We will need this to be able to call loop_reread_partitions() without loop_ctl_mutex. Signed-off-by: Jan Kara --- drivers/block/loop.c | 51 +-- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 577d5e5a9312..1cc29bd77d67 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1142,46 +1142,55 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) struct loop_func_table *xfer; kuid_t uid = current_uid(); + err = mutex_lock_killable_nested(_ctl_mutex, 1); + if (err) + return err; if (lo->lo_encrypt_key_size && !uid_eq(lo->lo_key_owner, uid) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - if (lo->lo_state != Lo_bound) - return -ENXIO; - if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) - return -EINVAL; + !capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out_unlock; + } + if (lo->lo_state != Lo_bound) { + err = -ENXIO; + goto out_unlock; + } + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) { + err = -EINVAL; + goto out_unlock; + } /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); err = loop_release_xfer(lo); if (err) - goto exit; + goto out_unfreeze; if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; if (type >= MAX_LO_CRYPT) { err = -EINVAL; - goto exit; + goto out_unfreeze; } xfer = xfer_funcs[type]; if (xfer == NULL) { err = -EINVAL; - goto exit; + goto out_unfreeze; } } else xfer = NULL; err = loop_init_xfer(lo, xfer, info); if (err) - goto exit; + goto out_unfreeze; if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; - goto exit; + goto out_unfreeze; } } @@ -1213,7 +1222,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) /* update dio if lo_offset or transfer is changed */ __loop_update_dio(lo, lo->use_dio); - exit: +out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && @@ -1222,6 +1231,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; loop_reread_partitions(lo, lo->lo_device); } +out_unlock: + mutex_unlock(_ctl_mutex); return err; } @@ -1468,12 +1479,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, case LOOP_SET_STATUS: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { - err = mutex_lock_killable_nested(_ctl_mutex, 1); - if (err) - return err; err = loop_set_status_old(lo, (struct loop_info __user *)arg); - mutex_unlock(_ctl_mutex); } break; case LOOP_GET_STATUS: @@ -1481,12 +1488,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, case LOOP_SET_STATUS64: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { - err = mutex_lock_killable_nested(_ctl_mutex, 1); - if (err) - return err; err = loop_set_status64(lo, (struct loop_info64 __user *) arg); - mutex_unlock(_ctl_mutex); } break; case LOOP_GET_STATUS64: @@ -1631,12 +1634,8 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, switch(cmd) { case LOOP_SET_STATUS: - err = mutex_lock_killable(_ctl_mutex); - if (!err) { - err = loop_set_status_compat(lo, -(const struct compat_loop_info __user *)a
[PATCH 10/14] loop: Push loop_ctl_mutex down to loop_set_fd()
Push lo_ctl_mutex down to loop_set_fd(). We will need this to be able to call loop_reread_partitions() without lo_ctl_mutex. Signed-off-by: Jan Kara --- drivers/block/loop.c | 26 ++ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1cc29bd77d67..504e5ef07509 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -919,13 +919,17 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (!file) goto out; + error = mutex_lock_killable_nested(_ctl_mutex, 1); + if (error) + goto out_putf; + error = -EBUSY; if (lo->lo_state != Lo_unbound) - goto out_putf; + goto out_unlock; error = loop_validate_file(file, bdev); if (error) - goto out_putf; + goto out_unlock; mapping = file->f_mapping; inode = mapping->host; @@ -937,10 +941,10 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, error = -EFBIG; size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) - goto out_putf; + goto out_unlock; error = loop_prepare_queue(lo); if (error) - goto out_putf; + goto out_unlock; error = 0; @@ -979,11 +983,14 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); + mutex_unlock(_ctl_mutex); return 0; - out_putf: +out_unlock: + mutex_unlock(_ctl_mutex); +out_putf: fput(file); - out: +out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1461,12 +1468,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, switch (cmd) { case LOOP_SET_FD: - err = mutex_lock_killable_nested(_ctl_mutex, 1); - if (err) - return err; - err = loop_set_fd(lo, mode, bdev, arg); - mutex_unlock(_ctl_mutex); - break; + return loop_set_fd(lo, mode, bdev, arg); case LOOP_CHANGE_FD: err = mutex_lock_killable_nested(_ctl_mutex, 1); if (err) -- 2.16.4
[PATCH 12/14] loop: Move special partition reread handling in loop_clr_fd()
The call of __blkdev_reread_part() from loop_reread_partition() happens only when we need to invalidate partitions from loop_release(). Thus move a detection for this into loop_clr_fd() and simplify loop_reread_partition(). This makes loop_reread_partition() safe to use without loop_ctl_mutex because we use only lo->lo_number and lo->lo_file_name in case of error for reporting purposes (thus possibly reporting outdate information is not a big deal) and we are safe from 'lo' going away under us by elevated lo->lo_refcnt. Signed-off-by: Jan Kara --- drivers/block/loop.c | 33 +++-- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b4ea862f14fd..cce1a32ae6d0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -631,18 +631,7 @@ static void loop_reread_partitions(struct loop_device *lo, { int rc; - /* -* bd_mutex has been held already in release path, so don't -* acquire it if this function is called in such case. -* -* If the reread partition isn't from release path, lo_refcnt -* must be at least one and it can only become zero when the -* current holder is released. -*/ - if (!atomic_read(>lo_refcnt)) - rc = __blkdev_reread_part(bdev); - else - rc = blkdev_reread_part(bdev); + rc = blkdev_reread_part(bdev); if (rc) pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", __func__, lo->lo_number, lo->lo_file_name, rc); @@ -1096,8 +1085,24 @@ static int __loop_clr_fd(struct loop_device *lo) module_put(THIS_MODULE); blk_mq_unfreeze_queue(lo->lo_queue); - if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) - loop_reread_partitions(lo, bdev); + if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) { + /* +* bd_mutex has been held already in release path, so don't +* acquire it if this function is called in such case. +* +* If the reread partition isn't from release path, lo_refcnt +* must be at least one and it can only become zero when the +* current holder is released. +*/ + if (!atomic_read(>lo_refcnt)) + err = __blkdev_reread_part(bdev); + else + err = blkdev_reread_part(bdev); + pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", + __func__, lo->lo_number, err); + /* Device is gone, no point in returning error */ + err = 0; + } lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; -- 2.16.4
[PATCH 11/14] loop: Push loop_ctl_mutex down to loop_change_fd()
Push loop_ctl_mutex down to loop_change_fd(). We will need this to be able to call loop_reread_partitions() without loop_ctl_mutex. Signed-off-by: Jan Kara --- drivers/block/loop.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 504e5ef07509..b4ea862f14fd 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -692,19 +692,22 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, struct file *file, *old_file; int error; + error = mutex_lock_killable_nested(_ctl_mutex, 1); + if (error) + return error; error = -ENXIO; if (lo->lo_state != Lo_bound) - goto out; + goto out_unlock; /* the loop device has to be read-only */ error = -EINVAL; if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) - goto out; + goto out_unlock; error = -EBADF; file = fget(arg); if (!file) - goto out; + goto out_unlock; error = loop_validate_file(file, bdev); if (error) @@ -731,11 +734,13 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, fput(old_file); if (lo->lo_flags & LO_FLAGS_PARTSCAN) loop_reread_partitions(lo, bdev); + mutex_unlock(_ctl_mutex); return 0; - out_putf: +out_putf: fput(file); - out: +out_unlock: + mutex_unlock(_ctl_mutex); return error; } @@ -1470,12 +1475,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, case LOOP_SET_FD: return loop_set_fd(lo, mode, bdev, arg); case LOOP_CHANGE_FD: - err = mutex_lock_killable_nested(_ctl_mutex, 1); - if (err) - return err; - err = loop_change_fd(lo, bdev, arg); - mutex_unlock(_ctl_mutex); - break; + return loop_change_fd(lo, bdev, arg); case LOOP_CLR_FD: return loop_clr_fd(lo); case LOOP_SET_STATUS: -- 2.16.4
[PATCH 05/14] loop: Push lo_ctl_mutex down into individual ioctls
Push acquisition of lo_ctl_mutex down into individual ioctl handling branches. This is a preparatory step for pushing the lock down into individual ioctl handling functions so that they can release the lock as they need it. We also factor out some simple ioctl handlers that will not need any special handling to reduce unnecessary code duplication. Signed-off-by: Jan Kara --- drivers/block/loop.c | 88 +--- 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e35707fb8318..a86ef20c15e2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1394,70 +1394,108 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) return 0; } -static int lo_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) +static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, + unsigned long arg) { - struct loop_device *lo = bdev->bd_disk->private_data; int err; err = mutex_lock_killable_nested(_ctl_mutex, 1); if (err) - goto out_unlocked; + return err; + switch (cmd) { + case LOOP_SET_CAPACITY: + err = loop_set_capacity(lo); + break; + case LOOP_SET_DIRECT_IO: + err = loop_set_dio(lo, arg); + break; + case LOOP_SET_BLOCK_SIZE: + err = loop_set_block_size(lo, arg); + break; + default: + err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; + } + mutex_unlock(_ctl_mutex); + return err; +} + +static int lo_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct loop_device *lo = bdev->bd_disk->private_data; + int err; switch (cmd) { case LOOP_SET_FD: + err = mutex_lock_killable_nested(_ctl_mutex, 1); + if (err) + return err; err = loop_set_fd(lo, mode, bdev, arg); + mutex_unlock(_ctl_mutex); break; case LOOP_CHANGE_FD: + err = mutex_lock_killable_nested(_ctl_mutex, 1); + if (err) + return err; err = loop_change_fd(lo, bdev, arg); + mutex_unlock(_ctl_mutex); break; case LOOP_CLR_FD: + err = mutex_lock_killable_nested(_ctl_mutex, 1); + if (err) + return err; /* loop_clr_fd would have unlocked loop_ctl_mutex on success */ err = loop_clr_fd(lo); - if (!err) - goto out_unlocked; + if (err) + mutex_unlock(_ctl_mutex); break; case LOOP_SET_STATUS: err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { + err = mutex_lock_killable_nested(_ctl_mutex, 1); + if (err) + return err; err = loop_set_status_old(lo, (struct loop_info __user *)arg); + mutex_unlock(_ctl_mutex); + } break; case LOOP_GET_STATUS: + err = mutex_lock_killable_nested(_ctl_mutex, 1); + if (err) + return err; err = loop_get_status_old(lo, (struct loop_info __user *) arg); /* loop_get_status() unlocks loop_ctl_mutex */ - goto out_unlocked; + break; case LOOP_SET_STATUS64: err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { + err = mutex_lock_killable_nested(_ctl_mutex, 1); + if (err) + return err; err = loop_set_status64(lo, (struct loop_info64 __user *) arg); + mutex_unlock(_ctl_mutex); + } break; case LOOP_GET_STATUS64: + err = mutex_lock_killable_nested(_ctl_mutex, 1); + if (err) + return err; err = loop_get_status64(lo, (struct loop_info64 __user *) arg); /* loop_get_status() unlocks loop_ctl_mutex */ - goto out_unlocked; - case LOOP_SET_CAPACITY: - err = -EPERM; - if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) - err = loop_set_capacity(lo); break; +
[PATCH 0/14] loop: Fix oops and possible deadlocks
Hi, this patch series fixes oops and possible deadlocks as reported by syzbot [1] [2]. The second patch in the series (from Tetsuo) fixes the oops, the remaining patches are cleaning up the locking in the loop driver so that we can in the end reasonably easily switch to rereading partitions without holding mutex protecting the loop device. I have lightly tested the patches by creating, deleting, and modifying loop devices but if there's some more comprehensive loopback device testsuite, I can try running it. Review is welcome! Honza [1] https://syzkaller.appspot.com/bug?id=f3cfe26e785d85f9ee259f385515291d21bd80a3 [2] https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d15889
[PATCH 07/14] loop: Push loop_ctl_mutex down into loop_clr_fd()
loop_clr_fd() has a weird locking convention that is expects loop_ctl_mutex held, releases it on success and keeps it on failure. Untangle the mess by moving locking of loop_ctl_mutex into loop_clr_fd(). Signed-off-by: Jan Kara --- drivers/block/loop.c | 49 + 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 51d11898e170..e4b82ca49286 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1028,15 +1028,22 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, static int __loop_clr_fd(struct loop_device *lo) { - struct file *filp = lo->lo_backing_file; + struct file *filp = NULL; gfp_t gfp = lo->old_gfp_mask; struct block_device *bdev = lo->lo_device; + int err = 0; - if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) - return -ENXIO; + mutex_lock(_ctl_mutex); + if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { + err = -ENXIO; + goto out_unlock; + } - if (filp == NULL) - return -EINVAL; + filp = lo->lo_backing_file; + if (filp == NULL) { + err = -EINVAL; + goto out_unlock; + } /* freeze request queue during the transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1083,6 +1090,7 @@ static int __loop_clr_fd(struct loop_device *lo) if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; loop_unprepare_queue(lo); +out_unlock: mutex_unlock(_ctl_mutex); /* * Need not hold loop_ctl_mutex to fput backing file. @@ -1090,14 +1098,22 @@ static int __loop_clr_fd(struct loop_device *lo) * lock dependency possibility warning as fput can take * bd_mutex which is usually taken before loop_ctl_mutex. */ - fput(filp); - return 0; + if (filp) + fput(filp); + return err; } static int loop_clr_fd(struct loop_device *lo) { - if (lo->lo_state != Lo_bound) + int err; + + err = mutex_lock_killable_nested(_ctl_mutex, 1); + if (err) + return err; + if (lo->lo_state != Lo_bound) { + mutex_unlock(_ctl_mutex); return -ENXIO; + } /* * If we've explicitly asked to tear down the loop device, * and it has an elevated reference count, set it for auto-teardown when @@ -1114,6 +1130,7 @@ static int loop_clr_fd(struct loop_device *lo) return 0; } lo->lo_state = Lo_rundown; + mutex_unlock(_ctl_mutex); return __loop_clr_fd(lo); } @@ -1448,14 +1465,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, mutex_unlock(_ctl_mutex); break; case LOOP_CLR_FD: - err = mutex_lock_killable_nested(_ctl_mutex, 1); - if (err) - return err; - /* loop_clr_fd would have unlocked loop_ctl_mutex on success */ - err = loop_clr_fd(lo); - if (err) - mutex_unlock(_ctl_mutex); - break; + return loop_clr_fd(lo); case LOOP_SET_STATUS: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { @@ -1691,7 +1701,6 @@ static int lo_open(struct block_device *bdev, fmode_t mode) static void lo_release(struct gendisk *disk, fmode_t mode) { struct loop_device *lo; - int err; mutex_lock(_ctl_mutex); lo = disk->private_data; @@ -1702,13 +1711,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode) if (lo->lo_state != Lo_bound) goto out_unlock; lo->lo_state = Lo_rundown; + mutex_unlock(_ctl_mutex); /* * In autoclear mode, stop the loop thread * and remove configuration after last close. */ - err = __loop_clr_fd(lo); - if (!err) - return; + __loop_clr_fd(lo); + return; } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, -- 2.16.4
[PATCH 04/14] loop: Get rid of loop_index_mutex
Now that loop_ctl_mutex is global, just get rid of loop_index_mutex as there is no good reason to keep these two separate and it just complicates the locking. Signed-off-by: Jan Kara --- drivers/block/loop.c | 38 ++ 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cc43d835fe6f..e35707fb8318 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -83,7 +83,6 @@ #include static DEFINE_IDR(loop_index_idr); -static DEFINE_MUTEX(loop_index_mutex); static DEFINE_MUTEX(loop_ctl_mutex); static int max_part; @@ -1627,9 +1626,11 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { struct loop_device *lo; - int err = 0; + int err; - mutex_lock(_index_mutex); + err = mutex_lock_killable(_ctl_mutex); + if (err) + return err; lo = bdev->bd_disk->private_data; if (!lo) { err = -ENXIO; @@ -1638,7 +1639,7 @@ static int lo_open(struct block_device *bdev, fmode_t mode) atomic_inc(>lo_refcnt); out: - mutex_unlock(_index_mutex); + mutex_unlock(_ctl_mutex); return err; } @@ -1647,12 +1648,11 @@ static void lo_release(struct gendisk *disk, fmode_t mode) struct loop_device *lo; int err; - mutex_lock(_index_mutex); + mutex_lock(_ctl_mutex); lo = disk->private_data; if (atomic_dec_return(>lo_refcnt)) - goto unlock_index; + goto out_unlock; - mutex_lock(_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { /* * In autoclear mode, stop the loop thread @@ -1660,7 +1660,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) */ err = loop_clr_fd(lo); if (!err) - goto unlock_index; + return; } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, @@ -1670,9 +1670,8 @@ static void lo_release(struct gendisk *disk, fmode_t mode) blk_mq_unfreeze_queue(lo->lo_queue); } +out_unlock: mutex_unlock(_ctl_mutex); -unlock_index: - mutex_unlock(_index_mutex); } static const struct block_device_operations lo_fops = { @@ -1973,7 +1972,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) struct kobject *kobj; int err; - mutex_lock(_index_mutex); + mutex_lock(_ctl_mutex); err = loop_lookup(, MINOR(dev) >> part_shift); if (err < 0) err = loop_add(, MINOR(dev) >> part_shift); @@ -1981,7 +1980,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) kobj = NULL; else kobj = get_disk_and_module(lo->lo_disk); - mutex_unlock(_index_mutex); + mutex_unlock(_ctl_mutex); *part = 0; return kobj; @@ -1993,7 +1992,10 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, struct loop_device *lo; int ret = -ENOSYS; - mutex_lock(_index_mutex); + ret = mutex_lock_killable(_ctl_mutex); + if (ret) + return ret; + switch (cmd) { case LOOP_CTL_ADD: ret = loop_lookup(, parm); @@ -2007,9 +2009,6 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(, parm); if (ret < 0) break; - ret = mutex_lock_killable(_ctl_mutex); - if (ret) - break; if (lo->lo_state != Lo_unbound) { ret = -EBUSY; mutex_unlock(_ctl_mutex); @@ -2021,7 +2020,6 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, break; } lo->lo_disk->private_data = NULL; - mutex_unlock(_ctl_mutex); idr_remove(_index_idr, lo->lo_number); loop_remove(lo); break; @@ -2031,7 +2029,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, break; ret = loop_add(, -1); } - mutex_unlock(_index_mutex); + mutex_unlock(_ctl_mutex); return ret; } @@ -2115,10 +2113,10 @@ static int __init loop_init(void) THIS_MODULE, loop_probe, NULL, NULL); /* pre-create number of devices given by config or max_loop */ - mutex_lock(_index_mutex); + mutex_lock(_ctl_mutex); for (i = 0; i < nr; i++) loop_add(, i); - mutex_unlock(_index_mutex); + mutex_unloc
[PATCH 13/14] loop: Move loop_reread_partitions() out of loop_ctl_mutex
Calling loop_reread_partitions() under loop_ctl_mutex causes lockdep to complain about circular lock dependency between bdev->bd_mutex and lo->lo_ctl_mutex. The problem is that on loop device open or close lo_open() and lo_release() get called with bdev->bd_mutex held and they need to acquire loop_ctl_mutex. OTOH when loop_reread_partitions() is called with loop_ctl_mutex held, it will call blkdev_reread_part() which acquires bdev->bd_mutex. See syzbot report for details [1]. Move all calls of loop_rescan_partitions() out of loop_ctl_mutex to avoid lockdep warning and fix deadlock possibility. [1] https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d1588 Reported-by: syzbot Signed-off-by: Jan Kara --- drivers/block/loop.c | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cce1a32ae6d0..d2be85c48f03 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -680,6 +680,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, { struct file *file, *old_file; int error; + boolpartscan; error = mutex_lock_killable_nested(_ctl_mutex, 1); if (error) @@ -721,9 +722,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, blk_mq_unfreeze_queue(lo->lo_queue); fput(old_file); - if (lo->lo_flags & LO_FLAGS_PARTSCAN) - loop_reread_partitions(lo, bdev); + partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; mutex_unlock(_ctl_mutex); + if (partscan) + loop_reread_partitions(lo, bdev); return 0; out_putf: @@ -904,6 +906,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, int lo_flags = 0; int error; loff_t size; + boolpartscan; /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); @@ -970,14 +973,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; - if (lo->lo_flags & LO_FLAGS_PARTSCAN) - loop_reread_partitions(lo, bdev); + partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; /* Grab the block_device to prevent its destruction after we * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); mutex_unlock(_ctl_mutex); + if (partscan) + loop_reread_partitions(lo, bdev); return 0; out_unlock: @@ -1158,6 +1162,8 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; struct loop_func_table *xfer; kuid_t uid = current_uid(); + struct block_device *bdev; + bool partscan = false; err = mutex_lock_killable_nested(_ctl_mutex, 1); if (err) @@ -1246,10 +1252,13 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_flags |= LO_FLAGS_PARTSCAN; lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; - loop_reread_partitions(lo, lo->lo_device); + bdev = lo->lo_device; + partscan = true; } out_unlock: mutex_unlock(_ctl_mutex); + if (partscan) + loop_reread_partitions(lo, bdev); return err; } -- 2.16.4
[PATCH 03/14] loop: Fold __loop_release into loop_release
__loop_release() has a single call site. Fold it there. This is currently not a huge win but it will make following replacement of loop_index_mutex more obvious. Signed-off-by: Jan Kara --- drivers/block/loop.c | 16 +++- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d0f1b7106572..cc43d835fe6f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1642,12 +1642,15 @@ static int lo_open(struct block_device *bdev, fmode_t mode) return err; } -static void __lo_release(struct loop_device *lo) +static void lo_release(struct gendisk *disk, fmode_t mode) { + struct loop_device *lo; int err; + mutex_lock(_index_mutex); + lo = disk->private_data; if (atomic_dec_return(>lo_refcnt)) - return; + goto unlock_index; mutex_lock(_ctl_mutex); if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { @@ -1657,7 +1660,7 @@ static void __lo_release(struct loop_device *lo) */ err = loop_clr_fd(lo); if (!err) - return; + goto unlock_index; } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, @@ -1668,12 +1671,7 @@ static void __lo_release(struct loop_device *lo) } mutex_unlock(_ctl_mutex); -} - -static void lo_release(struct gendisk *disk, fmode_t mode) -{ - mutex_lock(_index_mutex); - __lo_release(disk->private_data); +unlock_index: mutex_unlock(_index_mutex); } -- 2.16.4
[PATCH 14/14] loop: Fix deadlock when calling blkdev_reread_part()
Calling blkdev_reread_part() under loop_ctl_mutex causes lockdep to complain about circular lock dependency between bdev->bd_mutex and lo->lo_ctl_mutex. The problem is that on loop device open or close lo_open() and lo_release() get called with bdev->bd_mutex held and they need to acquire loop_ctl_mutex. OTOH when loop_reread_partitions() is called with loop_ctl_mutex held, it will call blkdev_reread_part() which acquires bdev->bd_mutex. See syzbot report for details [1]. Move call to blkdev_reread_part() in __loop_clr_fd() from under loop_ctl_mutex to finish fixing of the lockdep warning and the possible deadlock. [1] https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d1588 Reported-by: syzbot Signed-off-by: Jan Kara --- drivers/block/loop.c | 28 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d2be85c48f03..a0fb7bf62b29 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1031,12 +1031,14 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, return err; } -static int __loop_clr_fd(struct loop_device *lo) +static int __loop_clr_fd(struct loop_device *lo, bool release) { struct file *filp = NULL; gfp_t gfp = lo->old_gfp_mask; struct block_device *bdev = lo->lo_device; int err = 0; + bool partscan = false; + int lo_number; mutex_lock(_ctl_mutex); if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { @@ -1089,7 +1091,15 @@ static int __loop_clr_fd(struct loop_device *lo) module_put(THIS_MODULE); blk_mq_unfreeze_queue(lo->lo_queue); - if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) { + partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev; + lo_number = lo->lo_number; + lo->lo_flags = 0; + if (!part_shift) + lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; + loop_unprepare_queue(lo); +out_unlock: + mutex_unlock(_ctl_mutex); + if (partscan) { /* * bd_mutex has been held already in release path, so don't * acquire it if this function is called in such case. @@ -1098,21 +1108,15 @@ static int __loop_clr_fd(struct loop_device *lo) * must be at least one and it can only become zero when the * current holder is released. */ - if (!atomic_read(>lo_refcnt)) + if (release) err = __blkdev_reread_part(bdev); else err = blkdev_reread_part(bdev); pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", - __func__, lo->lo_number, err); + __func__, lo_number, err); /* Device is gone, no point in returning error */ err = 0; } - lo->lo_flags = 0; - if (!part_shift) - lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; - loop_unprepare_queue(lo); -out_unlock: - mutex_unlock(_ctl_mutex); /* * Need not hold loop_ctl_mutex to fput backing file. * Calling fput holding loop_ctl_mutex triggers a circular @@ -1153,7 +1157,7 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_rundown; mutex_unlock(_ctl_mutex); - return __loop_clr_fd(lo); + return __loop_clr_fd(lo, false); } static int @@ -1714,7 +1718,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) * In autoclear mode, stop the loop thread * and remove configuration after last close. */ - __loop_clr_fd(lo); + __loop_clr_fd(lo, true); return; } else if (lo->lo_state == Lo_bound) { /* -- 2.16.4
[PATCH 08/14] loop: Push loop_ctl_mutex down to loop_get_status()
Push loop_ctl_mutex down to loop_get_status() to avoid the unusual convention that the function gets called with loop_ctl_mutex held and releases it. Signed-off-by: Jan Kara --- drivers/block/loop.c | 37 ++--- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e4b82ca49286..577d5e5a9312 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1233,6 +1233,9 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) struct kstat stat; int ret; + ret = mutex_lock_killable_nested(_ctl_mutex, 1); + if (ret) + return ret; if (lo->lo_state != Lo_bound) { mutex_unlock(_ctl_mutex); return -ENXIO; @@ -1347,10 +1350,8 @@ loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { struct loop_info64 info64; int err; - if (!arg) { - mutex_unlock(_ctl_mutex); + if (!arg) return -EINVAL; - } err = loop_get_status(lo, ); if (!err) err = loop_info64_to_old(, ); @@ -1365,10 +1366,8 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { struct loop_info64 info64; int err; - if (!arg) { - mutex_unlock(_ctl_mutex); + if (!arg) return -EINVAL; - } err = loop_get_status(lo, ); if (!err && copy_to_user(arg, , sizeof(info64))) err = -EFAULT; @@ -1478,12 +1477,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, } break; case LOOP_GET_STATUS: - err = mutex_lock_killable_nested(_ctl_mutex, 1); - if (err) - return err; - err = loop_get_status_old(lo, (struct loop_info __user *) arg); - /* loop_get_status() unlocks loop_ctl_mutex */ - break; + return loop_get_status_old(lo, (struct loop_info __user *) arg); case LOOP_SET_STATUS64: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { @@ -1496,12 +1490,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, } break; case LOOP_GET_STATUS64: - err = mutex_lock_killable_nested(_ctl_mutex, 1); - if (err) - return err; - err = loop_get_status64(lo, (struct loop_info64 __user *) arg); - /* loop_get_status() unlocks loop_ctl_mutex */ - break; + return loop_get_status64(lo, (struct loop_info64 __user *) arg); case LOOP_SET_CAPACITY: case LOOP_SET_DIRECT_IO: case LOOP_SET_BLOCK_SIZE: @@ -1626,10 +1615,8 @@ loop_get_status_compat(struct loop_device *lo, struct loop_info64 info64; int err; - if (!arg) { - mutex_unlock(_ctl_mutex); + if (!arg) return -EINVAL; - } err = loop_get_status(lo, ); if (!err) err = loop_info64_to_compat(, arg); @@ -1652,12 +1639,8 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, } break; case LOOP_GET_STATUS: - err = mutex_lock_killable(_ctl_mutex); - if (!err) { - err = loop_get_status_compat(lo, -(struct compat_loop_info __user *)arg); - /* loop_get_status() unlocks loop_ctl_mutex */ - } + err = loop_get_status_compat(lo, +(struct compat_loop_info __user *)arg); break; case LOOP_SET_CAPACITY: case LOOP_CLR_FD: -- 2.16.4
Re: [PATCH 4/4] block/loop: Fix circular locking dependency at blkdev_reread_part().
On Thu 27-09-18 20:35:27, Tetsuo Handa wrote: > On 2018/09/27 20:27, Jan Kara wrote: > > Hi, > > > > On Wed 26-09-18 00:26:49, Tetsuo Handa wrote: > >> syzbot is reporting circular locking dependency between bdev->bd_mutex > >> and lo->lo_ctl_mutex [1] which is caused by calling blkdev_reread_part() > >> with lock held. We need to drop lo->lo_ctl_mutex in order to fix it. > >> > >> This patch fixes it by combining loop_index_mutex and loop_ctl_mutex into > >> loop_mutex, and releasing loop_mutex before calling blkdev_reread_part() > >> or fput() or path_put() or leaving ioctl(). > >> > >> The rule is that current thread calls lock_loop() before accessing > >> "struct loop_device", and current thread no longer accesses "struct > >> loop_device" after unlock_loop() is called. > >> > >> Since syzbot is reporting various bugs [2] where a race in the loop module > >> is suspected, let's check whether this patch affects these bugs too. > >> > >> [1] > >> https://syzkaller.appspot.com/bug?id=bf154052f0eea4bc7712499e4569505907d15889 > >> [2] > >> https://syzkaller.appspot.com/bug?id=b3c7e1440aa8ece16bf557dbac427fdff1dad9d6 > >> > >> Signed-off-by: Tetsuo Handa > >> Reported-by: syzbot > >> > >> --- > >> drivers/block/loop.c | 187 > >> --- > >> 1 file changed, 101 insertions(+), 86 deletions(-) > > > > I still don't like this patch. I'll post a patch series showing what I have > > in mind. Admittedly, it's a bit tedious but the locking is much saner > > afterwards... > > Please be sure to Cc: me. I'm not subscribed to linux-block ML. Yes, I've CCed you. > But if we have to release lo_ctl_mutex before calling blkdev_reread_part(), > what is nice with re-acquiring lo_ctl_mutex after blkdev_reread_part() ? We don't reacquire the mutex after blkdev_reread_part(). Just the code needed to be cleaned up so that loop_reread_part() does not need lo_ctl_mutex for anything. Honza -- Jan Kara SUSE Labs, CR
Re: [PATCH 8/8] aio: support for IO polling
On Tue 20-11-18 10:19:53, Jens Axboe wrote: > +/* > + * We can't just wait for polled events to come to us, we have to actively > + * find and complete them. > + */ > +static void aio_iopoll_reap_events(struct kioctx *ctx) > +{ > + if (!(ctx->flags & IOCTX_FLAG_IOPOLL)) > + return; > + > + while (!list_empty_careful(>poll_submitted) || > +!list_empty(>poll_completing)) { > + unsigned int nr_events = 0; > + > + __aio_iopoll_check(ctx, NULL, _events, 1, UINT_MAX); > + } > +} > + > +static int aio_iopoll_check(struct kioctx *ctx, long min_nr, long nr, > + struct io_event __user *event) > +{ > + unsigned int nr_events = 0; > + int ret = 0; > + > + /* * Only allow one thread polling at a time */ > + if (test_and_set_bit(0, >getevents_busy)) > + return -EBUSY; > + > + while (!nr_events || !need_resched()) { > + int tmin = 0; > + > + if (nr_events < min_nr) > + tmin = min_nr - nr_events; > + > + ret = __aio_iopoll_check(ctx, event, _events, tmin, nr); > + if (ret <= 0) > + break; > + ret = 0; > + } > + > + clear_bit(0, >getevents_busy); > + return nr_events ? nr_events : ret; > +} Hum, what if userspace calls io_destroy() while another process is polling for events on the same kioctx? It seems we'd be reaping events from two processes in parallel in that case which will result in various "interesting" effects like ctx->poll_completing list corruption... Honza -- Jan Kara SUSE Labs, CR