Hi,

On 04.08.2021 11:41, Christoph Hellwig wrote:
> device mapper needs to register holders before it is ready to do I/O.
> Currently it does so by registering the disk early, which can leave
> the disk and queue in a weird half state where the queue is registered
> with the disk, except for sysfs and the elevator.  And this state has
> been a bit promlematic before, and will get more so when sorting out
> the responsibilities between the queue and the disk.
>
> Support registering holders on an initialized but not registered disk
> instead by delaying the sysfs registration until the disk is registered.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Reviewed-by: Mike Snitzer <[email protected]>

This patch landed in today's linux-next (20210810) as commit 
d62633873590 ("block: support delayed holder registration"). It triggers 
a following lockdep warning on ARM64's virt 'machine' on QEmu:

======================================================
WARNING: possible circular locking dependency detected
5.14.0-rc4+ #10642 Not tainted
------------------------------------------------------
systemd-udevd/227 is trying to acquire lock:
ffffb6b41952d628 (mtd_table_mutex){+.+.}-{3:3}, at: blktrans_open+0x40/0x250

but task is already holding lock:
ffff0eacc403bb18 (&disk->open_mutex){+.+.}-{3:3}, at: 
blkdev_get_by_dev+0x110/0x2f8

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (&disk->open_mutex){+.+.}-{3:3}:
        __mutex_lock+0xa4/0x978
        mutex_lock_nested+0x54/0x60
        bd_register_pending_holders+0x2c/0x118
        __device_add_disk+0x1d8/0x368
        device_add_disk+0x10/0x18
        add_mtd_blktrans_dev+0x2dc/0x428
        mtdblock_add_mtd+0x68/0x98
        blktrans_notify_add+0x44/0x70
        add_mtd_device+0x430/0x4a0
        mtd_device_parse_register+0x1a4/0x2b0
        physmap_flash_probe+0x44c/0x780
        platform_probe+0x90/0xd8
        really_probe+0x138/0x2d0
        __driver_probe_device+0x78/0xd8
        driver_probe_device+0x40/0x110
        __driver_attach+0xcc/0x118
        bus_for_each_dev+0x68/0xc8
        driver_attach+0x20/0x28
        bus_add_driver+0x168/0x1f8
        driver_register+0x60/0x110
        __platform_driver_register+0x24/0x30
        physmap_init+0x18/0x20
        do_one_initcall+0x84/0x450
        kernel_init_freeable+0x31c/0x38c
        kernel_init+0x20/0x120
        ret_from_fork+0x10/0x18

-> #0 (mtd_table_mutex){+.+.}-{3:3}:
        __lock_acquire+0xff4/0x1840
        lock_acquire+0x130/0x3e8
        __mutex_lock+0xa4/0x978
        mutex_lock_nested+0x54/0x60
        blktrans_open+0x40/0x250
        blkdev_get_whole+0x28/0x120
        blkdev_get_by_dev+0x15c/0x2f8
        blkdev_open+0x50/0xb0
        do_dentry_open+0x238/0x3c0
        vfs_open+0x28/0x30
        path_openat+0x720/0x938
        do_filp_open+0x80/0x108
        do_sys_openat2+0x1b4/0x2c8
        do_sys_open+0x68/0x88
        __arm64_compat_sys_openat+0x1c/0x28
        invoke_syscall+0x40/0xf8
        el0_svc_common+0x60/0x100
        do_el0_svc_compat+0x1c/0x48
        el0_svc_compat+0x20/0x30
        el0t_32_sync_handler+0xec/0x140
        el0t_32_sync+0x168/0x16c

other info that might help us debug this:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&disk->open_mutex);
                                lock(mtd_table_mutex);
                                lock(&disk->open_mutex);
   lock(mtd_table_mutex);

  *** DEADLOCK ***

1 lock held by systemd-udevd/227:
  #0: ffff0eacc403bb18 (&disk->open_mutex){+.+.}-{3:3}, at: 
blkdev_get_by_dev+0x110/0x2f8

stack backtrace:
CPU: 1 PID: 227 Comm: systemd-udevd Not tainted 5.14.0-rc4+ #10642
Hardware name: linux,dummy-virt (DT)
Call trace:
  dump_backtrace+0x0/0x1d0
  show_stack+0x14/0x20
  dump_stack_lvl+0x88/0xb0
  dump_stack+0x14/0x2c
  print_circular_bug.isra.50+0x1ac/0x200
  check_noncircular+0x134/0x148
  __lock_acquire+0xff4/0x1840
  lock_acquire+0x130/0x3e8
  __mutex_lock+0xa4/0x978
  mutex_lock_nested+0x54/0x60
  blktrans_open+0x40/0x250
  blkdev_get_whole+0x28/0x120
  blkdev_get_by_dev+0x15c/0x2f8
  blkdev_open+0x50/0xb0
  do_dentry_open+0x238/0x3c0
  vfs_open+0x28/0x30
  path_openat+0x720/0x938
  do_filp_open+0x80/0x108
  do_sys_openat2+0x1b4/0x2c8
  do_sys_open+0x68/0x88
  __arm64_compat_sys_openat+0x1c/0x28
  invoke_syscall+0x40/0xf8
  el0_svc_common+0x60/0x100
  do_el0_svc_compat+0x1c/0x48
  el0_svc_compat+0x20/0x30
  el0t_32_sync_handler+0xec/0x140
  el0t_32_sync+0x168/0x16c

If this is a false positive, then it should be annotated as such.

> ---
>   block/genhd.c         | 10 +++++++
>   block/holder.c        | 68 ++++++++++++++++++++++++++++++++-----------
>   include/linux/genhd.h |  5 ++++
>   3 files changed, 66 insertions(+), 17 deletions(-)
>
> diff --git a/block/genhd.c b/block/genhd.c
> index cd4eab744667..db916f779077 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -447,6 +447,16 @@ static void register_disk(struct device *parent, struct 
> gendisk *disk,
>               kobject_create_and_add("holders", &ddev->kobj);
>       disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
>   
> +     /*
> +      * XXX: this is a mess, can't wait for real error handling in add_disk.
> +      * Make sure ->slave_dir is NULL if we failed some of the registration
> +      * so that the cleanup in bd_unlink_disk_holder works properly.
> +      */
> +     if (bd_register_pending_holders(disk) < 0) {
> +             kobject_put(disk->slave_dir);
> +             disk->slave_dir = NULL;
> +     }
> +
>       if (disk->flags & GENHD_FL_HIDDEN)
>               return;
>   
> diff --git a/block/holder.c b/block/holder.c
> index 11e65d99a9fb..4568cc4f6827 100644
> --- a/block/holder.c
> +++ b/block/holder.c
> @@ -28,6 +28,19 @@ static void del_symlink(struct kobject *from, struct 
> kobject *to)
>       sysfs_remove_link(from, kobject_name(to));
>   }
>   
> +static int __link_disk_holder(struct block_device *bdev, struct gendisk 
> *disk)
> +{
> +     int ret;
> +
> +     ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
> +     if (ret)
> +             return ret;
> +     ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
> +     if (ret)
> +             del_symlink(disk->slave_dir, bdev_kobj(bdev));
> +     return ret;
> +}
> +
>   /**
>    * bd_link_disk_holder - create symlinks between holding disk and slave bdev
>    * @bdev: the claimed slave bdev
> @@ -66,7 +79,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct 
> gendisk *disk)
>       WARN_ON_ONCE(!bdev->bd_holder);
>   
>       /* FIXME: remove the following once add_disk() handles errors */
> -     if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
> +     if (WARN_ON(!bdev->bd_holder_dir))
>               goto out_unlock;
>   
>       holder = bd_find_holder_disk(bdev, disk);
> @@ -84,28 +97,28 @@ int bd_link_disk_holder(struct block_device *bdev, struct 
> gendisk *disk)
>       INIT_LIST_HEAD(&holder->list);
>       holder->bdev = bdev;
>       holder->refcnt = 1;
> -
> -     ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
> -     if (ret)
> -             goto out_free;
> -
> -     ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
> -     if (ret)
> -             goto out_del;
> +     if (disk->slave_dir) {
> +             ret = __link_disk_holder(bdev, disk);
> +             if (ret) {
> +                     kfree(holder);
> +                     goto out_unlock;
> +             }
> +     }
>   
>       list_add(&holder->list, &disk->slave_bdevs);
> -     goto out_unlock;
> -
> -out_del:
> -     del_symlink(disk->slave_dir, bdev_kobj(bdev));
> -out_free:
> -     kfree(holder);
>   out_unlock:
>       mutex_unlock(&disk->open_mutex);
>       return ret;
>   }
>   EXPORT_SYMBOL_GPL(bd_link_disk_holder);
>   
> +static void __unlink_disk_holder(struct block_device *bdev,
> +             struct gendisk *disk)
> +{
> +     del_symlink(disk->slave_dir, bdev_kobj(bdev));
> +     del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
> +}
> +
>   /**
>    * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder()
>    * @bdev: the calimed slave bdev
> @@ -123,11 +136,32 @@ void bd_unlink_disk_holder(struct block_device *bdev, 
> struct gendisk *disk)
>       mutex_lock(&disk->open_mutex);
>       holder = bd_find_holder_disk(bdev, disk);
>       if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
> -             del_symlink(disk->slave_dir, bdev_kobj(bdev));
> -             del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
> +             if (disk->slave_dir)
> +                     __unlink_disk_holder(bdev, disk);
>               list_del_init(&holder->list);
>               kfree(holder);
>       }
>       mutex_unlock(&disk->open_mutex);
>   }
>   EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
> +
> +int bd_register_pending_holders(struct gendisk *disk)
> +{
> +     struct bd_holder_disk *holder;
> +     int ret;
> +
> +     mutex_lock(&disk->open_mutex);
> +     list_for_each_entry(holder, &disk->slave_bdevs, list) {
> +             ret = __link_disk_holder(holder->bdev, disk);
> +             if (ret)
> +                     goto out_undo;
> +     }
> +     mutex_unlock(&disk->open_mutex);
> +     return 0;
> +
> +out_undo:
> +     list_for_each_entry_continue_reverse(holder, &disk->slave_bdevs, list)
> +             __unlink_disk_holder(holder->bdev, disk);
> +     mutex_unlock(&disk->open_mutex);
> +     return ret;
> +}
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index 0721807d76ee..80952f038d79 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -323,6 +323,7 @@ long compat_blkdev_ioctl(struct file *, unsigned, 
> unsigned long);
>   #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
>   int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
>   void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
> +int bd_register_pending_holders(struct gendisk *disk);
>   #else
>   static inline int bd_link_disk_holder(struct block_device *bdev,
>                                     struct gendisk *disk)
> @@ -333,6 +334,10 @@ static inline void bd_unlink_disk_holder(struct 
> block_device *bdev,
>                                        struct gendisk *disk)
>   {
>   }
> +static inline int bd_register_pending_holders(struct gendisk *disk)
> +{
> +     return 0;
> +}
>   #endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
>   
>   dev_t part_devt(struct gendisk *disk, u8 partno);

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland


--
dm-devel mailing list
[email protected]
https://listman.redhat.com/mailman/listinfo/dm-devel

Reply via email to