scsi_device_purge_requests() is called from I/O code by virtio-scsi TMF emulation code. It must not call Global State APIs like blk_drain() because that results in an assertion failure.
blk_drain() is a Global State API because it uses bdrv_unref(). Actually ref/unref is unnecessary in device emulation code because the drive= qdev property holds the reference. Introduce blk_drain_noref(), a variant of blk_drain() that is IO_OR_GS_CODE() and doesn't take a reference to the BlockBackend. This fixes the following virtio-scsi IOThread assertion failure when hot-plugging scsi-hd devices: qemu-kvm: ../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed. #0 0x00007f4b6a0a154c __pthread_kill_implementation (libc.so.6> #1 0x00007f4b6a054d46 raise (libc.so.6 + 0x54d46) #2 0x00007f4b6a0287f3 abort (libc.so.6 + 0x287f3) #3 0x00007f4b6a02871b __assert_fail_base.cold (libc.so.6 + 0x2> #4 0x00007f4b6a04dce6 __assert_fail (libc.so.6 + 0x4dce6) #5 0x000055c811253ac0 blk_drain (qemu-kvm + 0x808ac0) #6 0x000055c810ecb951 scsi_device_purge_requests (qemu-kvm + 0> ^^^^^^^^^^^^^^^^^^^^^^^^^^ #7 0x000055c810ed6ab8 scsi_disk_reset (qemu-kvm + 0x48bab8) #8 0x000055c8111dc908 resettable_phase_hold (qemu-kvm + 0x7919> #9 0x000055c8111db7f0 device_cold_reset (qemu-kvm + 0x7907f0) #10 0x000055c8110bb1a6 virtio_scsi_handle_ctrl (qemu-kvm + 0x67> #11 0x000055c8110d458f virtio_queue_host_notifier_read (qemu-kv> #12 0x000055c811404a73 aio_dispatch_handler (qemu-kvm + 0x9b9a7> #13 0x000055c811405743 aio_poll (qemu-kvm + 0x9ba743) #14 0x000055c81121d532 iothread_run (qemu-kvm + 0x7d2532) #15 0x000055c81140926a qemu_thread_start (qemu-kvm + 0x9be26a) #16 0x00007f4b6a09f802 start_thread (libc.so.6 + 0x9f802) #17 0x00007f4b6a03f450 __clone3 (libc.so.6 + 0x3f450) Reported-by: Qing Wang <qinw...@redhat.com> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155748 Cc: Paolo Bonzini <pbonz...@redhat.com> Cc: Kevin Wolf <kw...@redhat.com> Cc: Emanuele Giuseppe Esposito <eespo...@redhat.com> Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> --- include/sysemu/block-backend-io.h | 1 + block/block-backend.c | 23 +++++++++++++++++++++++ hw/scsi/scsi-bus.c | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h index 7ec6d978d4..f73db1f77c 100644 --- a/include/sysemu/block-backend-io.h +++ b/include/sysemu/block-backend-io.h @@ -73,6 +73,7 @@ void blk_iostatus_set_err(BlockBackend *blk, int error); int blk_get_max_iov(BlockBackend *blk); int blk_get_max_hw_iov(BlockBackend *blk); +void blk_drain_noref(BlockBackend *blk); void blk_io_plug(BlockBackend *blk); void blk_io_unplug(BlockBackend *blk); AioContext *blk_get_aio_context(BlockBackend *blk); diff --git a/block/block-backend.c b/block/block-backend.c index ba7bf1d6bc..20914bae7b 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1815,6 +1815,29 @@ void blk_drain(BlockBackend *blk) } } +/* + * Same as blk_drain() but the caller must hold a reference to blk. May be + * called from Global State or I/O API code. Device emulation can call this + * because the qdev drive= property holds the reference. + */ +void blk_drain_noref(BlockBackend *blk) +{ + BlockDriverState *bs = blk_bs(blk); + IO_OR_GS_CODE(); + + if (bs) { + bdrv_drained_begin(bs); + } + + /* We may have -ENOMEDIUM completions in flight */ + AIO_WAIT_WHILE(blk_get_aio_context(blk), + qatomic_mb_read(&blk->in_flight) > 0); + + if (bs) { + bdrv_drained_end(bs); + } +} + void blk_drain_all(void) { BlockBackend *blk = NULL; diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index ceceafb2cd..110fd85106 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -1663,7 +1663,7 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) req = QTAILQ_FIRST(&sdev->requests); scsi_req_cancel_async(req, NULL); } - blk_drain(sdev->conf.blk); + blk_drain_noref(sdev->conf.blk); aio_context_release(blk_get_aio_context(sdev->conf.blk)); scsi_device_set_ua(sdev, sense); } -- 2.39.0