Re: [PATCH 1/5] virtio-blk: enforce iothread-vq-mapping validation
On Mon, 05 Feb 2024 19:26, Stefan Hajnoczi wrote: Hanna Czenczek noticed that the safety of `vq_aio_context[vq->value] = ctx;` with user-defined vq->value inputs is not obvious. The code is structured in validate() + apply() steps so input validation is there, but it happens way earlier and there is nothing that guarantees apply() can only be called with validated inputs. This patch moves the validate() call inside the apply() function so validation is guaranteed. I also added the bounds checking assertion that Hanna suggested. Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 192 +++--- 1 file changed, 107 insertions(+), 85 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 227d83569f..e8b37fd5f4 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1485,6 +1485,72 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, return 0; } +static void virtio_resize_cb(void *opaque) +{ +VirtIODevice *vdev = opaque; + +assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +virtio_notify_config(vdev); +} + +static void virtio_blk_resize(void *opaque) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + +/* + * virtio_notify_config() needs to acquire the BQL, + * so it can't be called from an iothread. Instead, schedule + * it to be run in the main context BH. + */ +aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); +} + +static void virtio_blk_ioeventfd_detach(VirtIOBlock *s) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(s); + +for (uint16_t i = 0; i < s->conf.num_queues; i++) { +VirtQueue *vq = virtio_get_queue(vdev, i); +virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); +} +} + +static void virtio_blk_ioeventfd_attach(VirtIOBlock *s) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(s); + +for (uint16_t i = 0; i < s->conf.num_queues; i++) { +VirtQueue *vq = virtio_get_queue(vdev, i); +virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); +} +} + +/* Suspend virtqueue ioeventfd processing during drain */ +static void virtio_blk_drained_begin(void *opaque) +{ +VirtIOBlock *s = opaque; + +if (s->ioeventfd_started) { +virtio_blk_ioeventfd_detach(s); +} +} + +/* Resume virtqueue ioeventfd processing after drain */ +static void virtio_blk_drained_end(void *opaque) +{ +VirtIOBlock *s = opaque; + +if (s->ioeventfd_started) { +virtio_blk_ioeventfd_attach(s); +} +} + +static const BlockDevOps virtio_block_ops = { +.resize_cb = virtio_blk_resize, +.drained_begin = virtio_blk_drained_begin, +.drained_end = virtio_blk_drained_end, +}; + static bool validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, uint16_t num_queues, Error **errp) @@ -1547,81 +1613,33 @@ validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, return true; } -static void virtio_resize_cb(void *opaque) -{ -VirtIODevice *vdev = opaque; - -assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -virtio_notify_config(vdev); -} - -static void virtio_blk_resize(void *opaque) -{ -VirtIODevice *vdev = VIRTIO_DEVICE(opaque); - -/* - * virtio_notify_config() needs to acquire the BQL, - * so it can't be called from an iothread. Instead, schedule - * it to be run in the main context BH. - */ -aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); -} - -static void virtio_blk_ioeventfd_detach(VirtIOBlock *s) -{ -VirtIODevice *vdev = VIRTIO_DEVICE(s); - -for (uint16_t i = 0; i < s->conf.num_queues; i++) { -VirtQueue *vq = virtio_get_queue(vdev, i); -virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); -} -} - -static void virtio_blk_ioeventfd_attach(VirtIOBlock *s) -{ -VirtIODevice *vdev = VIRTIO_DEVICE(s); - -for (uint16_t i = 0; i < s->conf.num_queues; i++) { -VirtQueue *vq = virtio_get_queue(vdev, i); -virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); -} -} - -/* Suspend virtqueue ioeventfd processing during drain */ -static void virtio_blk_drained_begin(void *opaque) -{ -VirtIOBlock *s = opaque; - -if (s->ioeventfd_started) { -virtio_blk_ioeventfd_detach(s); -} -} - -/* Resume virtqueue ioeventfd processing after drain */ -static void virtio_blk_drained_end(void *opaque) -{ -VirtIOBlock *s = opaque; - -if (s->ioeventfd_started) { -virtio_blk_ioeventfd_attach(s); -} -} - -static const BlockDevOps virtio_block_ops = { -.resize_cb = virtio_blk_resize, -.drained_begin = virtio_blk_drained_begin, -.drained_end = virtio_blk_drained_end, -}; - -/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ -static void -apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, -
Re: [PATCH 5/5] monitor: use aio_co_reschedule_self()
On Mon, 05 Feb 2024 19:26, Stefan Hajnoczi wrote: The aio_co_reschedule_self() API is designed to avoid the race condition between scheduling the coroutine in another AioContext and yielding. The QMP dispatch code uses the open-coded version that appears susceptible to the race condition at first glance: aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); qemu_coroutine_yield(); The code is actually safe because the iohandler and qemu_aio_context AioContext run under the Big QEMU Lock. Nevertheless, set a good example and use aio_co_reschedule_self() so it's obvious that there is no race. Suggested-by: Hanna Reitz Signed-off-by: Stefan Hajnoczi --- qapi/qmp-dispatch.c | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c index 176b549473..f3488afeef 100644 --- a/qapi/qmp-dispatch.c +++ b/qapi/qmp-dispatch.c @@ -212,8 +212,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ * executing the command handler so that it can make progress if it * involves an AIO_WAIT_WHILE(). */ -aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); -qemu_coroutine_yield(); +aio_co_reschedule_self(qemu_get_aio_context()); } monitor_set_cur(qemu_coroutine_self(), cur_mon); @@ -227,9 +226,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ * Move back to iohandler_ctx so that nested event loops for * qemu_aio_context don't start new monitor commands. */ -aio_co_schedule(iohandler_get_aio_context(), -qemu_coroutine_self()); -qemu_coroutine_yield(); +aio_co_reschedule_self(iohandler_get_aio_context()); } } else { /* -- 2.43.0 Reviewed-by: Manos Pitsidianakis
Re: [PATCH 2/5] virtio-blk: clarify that there is at least 1 virtqueue
On Mon, 05 Feb 2024 19:26, Stefan Hajnoczi wrote: It is not possible to instantiate a virtio-blk device with 0 virtqueues. The following check is located in ->realize(): if (!conf->num_queues) { error_setg(errp, "num-queues property must be larger than 0"); return; } Later on we access s->vq_aio_context[0] under the assumption that there is as least one virtqueue. Hanna Czenczek noted that it would help to show that the array index is already valid. Add an assertion to document that s->vq_aio_context[0] is always safe...and catch future code changes that break this assumption. Suggested-by: Hanna Czenczek Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index e8b37fd5f4..a0735a9bca 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1825,6 +1825,7 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) * Try to change the AioContext so that block jobs and other operations can * co-locate their activity in the same AioContext. If it fails, nevermind. */ +assert(nvqs > 0); /* enforced during ->realize() */ r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], _err); if (r < 0) { -- 2.43.0 Reviewed-by: Manos Pitsidianakis
Re: [PATCH 3/5] virtio-blk: add vq_rq[] bounds check in virtio_blk_dma_restart_cb()
On Mon, 05 Feb 2024 19:26, Stefan Hajnoczi wrote: Hanna Czenczek noted that the array index in virtio_blk_dma_restart_cb() is not bounds-checked: g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues); ... while (rq) { VirtIOBlockReq *next = rq->next; uint16_t idx = virtio_get_queue_index(rq->vq); rq->next = vq_rq[idx]; ^^ The code is correct because both rq->vq and vq_rq[] depend on num_queues, but this is indirect and not 100% obvious. Add an assertion. This sentence could be useful as an inline comment too. Suggested-by: Hanna Czenczek Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index a0735a9bca..f3193f4b75 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1209,6 +1209,7 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, VirtIOBlockReq *next = rq->next; uint16_t idx = virtio_get_queue_index(rq->vq); +assert(idx < num_queues); rq->next = vq_rq[idx]; vq_rq[idx] = rq; rq = next; -- 2.43.0 Reviewed-by: Manos Pitsidianakis
Re: [PATCH 4/5] virtio-blk: declare VirtIOBlock::rq with a type
On Mon, 05 Feb 2024 19:26, Stefan Hajnoczi wrote: The VirtIOBlock::rq field has had the type void * since its introduction in commit 869a5c6df19a ("Stop VM on error in virtio-blk. (Gleb Natapov)"). Perhaps this was done to avoid the forward declaration of VirtIOBlockReq. Hanna Czenczek pointed out the missing type. Specify the actual type because there is no need to use void * here. Suggested-by: Hanna Czenczek Signed-off-by: Stefan Hajnoczi --- include/hw/virtio/virtio-blk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 833a9a344f..5c14110c4b 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -55,7 +55,7 @@ struct VirtIOBlock { VirtIODevice parent_obj; BlockBackend *blk; QemuMutex rq_lock; -void *rq; /* protected by rq_lock */ +struct VirtIOBlockReq *rq; /* protected by rq_lock */ VirtIOBlkConf conf; unsigned short sector_mask; bool original_wce; -- 2.43.0 Reviewed-by: Manos Pitsidianakis
Re: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE compatibility
On Mon, Feb 5, 2024 at 6:51 PM Stefano Garzarella wrote: > > On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote: > >VDUSE requires that virtqueues are first enabled before the DRIVER_OK > >status flag is set; with the current API of the kernel module, it is > >impossible to enable the opposite order in our block export code because > >userspace is not notified when a virtqueue is enabled. Did this mean virtio-blk will enable a virtqueue after DRIVER_OK? Sepc is not clear about this and that's why we introduce VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK. > > Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message, I think you meant when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is negotiated. If this is truth, it seems a little more complicated, for example the get_backend_features needs to be forward to the userspace? This seems suboptimal to implement this in the spec first and then we can leverage the features. Or we can have another parameter for the ioctl that creates the vduse device. > I'll start another thread about that, but in the meantime I agree that > we should fix QEMU since we need to work properly with old kernels as > well. > > > > >This requirement also mathces the normal initialisation order as done by > >the generic vhost code in QEMU. However, commit 6c482547 accidentally > >changed the order for vdpa-dev and broke access to VDUSE devices with > >this. > > > >This changes vdpa-dev to use the normal order again and use the standard > >vhost callback .vhost_set_vring_enable for this. VDUSE devices can be > >used with vdpa-dev again after this fix. > > I like this approach and the patch LGTM, but I'm a bit worried about > this function in hw/net/vhost_net.c: > > int vhost_set_vring_enable(NetClientState *nc, int enable) > { > VHostNetState *net = get_vhost_net(nc); > const VhostOps *vhost_ops = net->dev.vhost_ops; > > nc->vring_enable = enable; > > if (vhost_ops && vhost_ops->vhost_set_vring_enable) { > return vhost_ops->vhost_set_vring_enable(>dev, enable); > } > > return 0; > } > > @Eugenio, @Jason, should we change some things there if vhost-vdpa > implements the vhost_set_vring_enable callback? Eugenio may know more, I remember we need to enable cvq first for shadow virtqueue to restore some states. > > Do you remember why we didn't implement it from the beginning? It seems the vrings parameter is introduced after vhost-vdpa is implemented. Thanks > > Thanks, > Stefano > > > > >Cc: qemu-sta...@nongnu.org > >Fixes: 6c4825476a4351530bcac17abab72295b75ffe98 > >Signed-off-by: Kevin Wolf > >--- > > hw/virtio/vdpa-dev.c | 5 + > > hw/virtio/vhost-vdpa.c | 17 + > > 2 files changed, 18 insertions(+), 4 deletions(-) > > > >diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c > >index eb9ecea83b..13e87f06f6 100644 > >--- a/hw/virtio/vdpa-dev.c > >+++ b/hw/virtio/vdpa-dev.c > >@@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, > >Error **errp) > > > > s->dev.acked_features = vdev->guest_features; > > > >-ret = vhost_dev_start(>dev, vdev, false); > >+ret = vhost_dev_start(>dev, vdev, true); > > if (ret < 0) { > > error_setg_errno(errp, -ret, "Error starting vhost"); > > goto err_guest_notifiers; > > } > >-for (i = 0; i < s->dev.nvqs; ++i) { > >-vhost_vdpa_set_vring_ready(>vdpa, i); > >-} > > s->started = true; > > > > /* > >diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > >index 3a43beb312..c4574d56c5 100644 > >--- a/hw/virtio/vhost-vdpa.c > >+++ b/hw/virtio/vhost-vdpa.c > >@@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, > >unsigned idx) > > return r; > > } > > > >+static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable) > >+{ > >+struct vhost_vdpa *v = dev->opaque; > >+unsigned int i; > >+int ret; > >+ > >+for (i = 0; i < dev->nvqs; ++i) { > >+ret = vhost_vdpa_set_vring_ready(v, i); > >+if (ret < 0) { > >+return ret; > >+} > >+} > >+ > >+return 0; > >+} > >+ > > static int vhost_vdpa_set_config_call(struct vhost_dev *dev, > >int fd) > > { > >@@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = { > > .vhost_set_features = vhost_vdpa_set_features, > > .vhost_reset_device = vhost_vdpa_reset_device, > > .vhost_get_vq_index = vhost_vdpa_get_vq_index, > >+.vhost_set_vring_enable = vhost_vdpa_set_vring_enable, > > .vhost_get_config = vhost_vdpa_get_config, > > .vhost_set_config = vhost_vdpa_set_config, > > .vhost_requires_shm_log = NULL, > >-- > >2.43.0 > > >
Re: [PATCH] tests/cdrom-test: Add cdrom test for LoongArch virt machine
Hi Thomas, On 2024/2/5 下午3:47, Thomas Huth wrote: On 05/02/2024 03.13, Bibo Mao wrote: The cdrom test skips to execute on LoongArch system with command "make check" Are you sure the test is marked with "skip"? ... it should at least test with the "none" machine...? I check again, cdrom test case passes to run with "none" machine. And the root cause is that xorriso rpm package is missing so it is skipped. Regards Bibo Mao this patch enables cdrom test for LoongArch virt machine platform. With this patch, cdrom test passes to run on LoongArch virt machine type. Signed-off-by: Bibo Mao --- tests/qtest/cdrom-test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c index 0945383789..c8b97d8d9a 100644 --- a/tests/qtest/cdrom-test.c +++ b/tests/qtest/cdrom-test.c @@ -271,6 +271,9 @@ int main(int argc, char **argv) const char *virtmachine[] = { "virt", NULL }; add_cdrom_param_tests(virtmachine); } + } else if (g_str_equal(arch, "loongarch64")) { + const char *virtmachine[] = { "virt", NULL }; + add_cdrom_param_tests(virtmachine); } else { const char *nonemachine[] = { "none", NULL }; add_cdrom_param_tests(nonemachine); Anyway, using the virt machine is certainly better than the "none" machine, so: Acked-by: Thomas Huth
Re: [PATCH] tests/cdrom-test: Add cdrom test for LoongArch virt machine
Hi Philippe, On 2024/2/5 下午8:58, Philippe Mathieu-Daudé wrote: Hi Bibo, On 5/2/24 03:13, Bibo Mao wrote: The cdrom test skips to execute on LoongArch system with command "make check", this patch enables cdrom test for LoongArch virt machine platform. With this patch, cdrom test passes to run on LoongArch virt machine type. Signed-off-by: Bibo Mao --- tests/qtest/cdrom-test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c index 0945383789..c8b97d8d9a 100644 --- a/tests/qtest/cdrom-test.c +++ b/tests/qtest/cdrom-test.c @@ -271,6 +271,9 @@ int main(int argc, char **argv) const char *virtmachine[] = { "virt", NULL }; add_cdrom_param_tests(virtmachine); } + } else if (g_str_equal(arch, "loongarch64")) { + const char *virtmachine[] = { "virt", NULL }; + add_cdrom_param_tests(virtmachine); What is the default device used, virtio-blk-pci? yes, it is. For virt machine type, the default type for block device is virtio interface, and it is defined at function loongarch_class_init(). mc->block_default_type = IF_VIRTIO Regards Bibo Mao
[PATCH 3/5] virtio-blk: add vq_rq[] bounds check in virtio_blk_dma_restart_cb()
Hanna Czenczek noted that the array index in virtio_blk_dma_restart_cb() is not bounds-checked: g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues); ... while (rq) { VirtIOBlockReq *next = rq->next; uint16_t idx = virtio_get_queue_index(rq->vq); rq->next = vq_rq[idx]; ^^ The code is correct because both rq->vq and vq_rq[] depend on num_queues, but this is indirect and not 100% obvious. Add an assertion. Suggested-by: Hanna Czenczek Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index a0735a9bca..f3193f4b75 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1209,6 +1209,7 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, VirtIOBlockReq *next = rq->next; uint16_t idx = virtio_get_queue_index(rq->vq); +assert(idx < num_queues); rq->next = vq_rq[idx]; vq_rq[idx] = rq; rq = next; -- 2.43.0
[PATCH 4/5] virtio-blk: declare VirtIOBlock::rq with a type
The VirtIOBlock::rq field has had the type void * since its introduction in commit 869a5c6df19a ("Stop VM on error in virtio-blk. (Gleb Natapov)"). Perhaps this was done to avoid the forward declaration of VirtIOBlockReq. Hanna Czenczek pointed out the missing type. Specify the actual type because there is no need to use void * here. Suggested-by: Hanna Czenczek Signed-off-by: Stefan Hajnoczi --- include/hw/virtio/virtio-blk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 833a9a344f..5c14110c4b 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -55,7 +55,7 @@ struct VirtIOBlock { VirtIODevice parent_obj; BlockBackend *blk; QemuMutex rq_lock; -void *rq; /* protected by rq_lock */ +struct VirtIOBlockReq *rq; /* protected by rq_lock */ VirtIOBlkConf conf; unsigned short sector_mask; bool original_wce; -- 2.43.0
[PATCH 0/5] virtio-blk: iothread-vq-mapping cleanups
Hanna reviewed the iothread-vq-mapping patches after they were applied to qemu.git. This series consists of code cleanups that Hanna identified. There are no functional changes or bug fixes that need to be backported to the stable tree here, but it may make sense to backport them in the future to avoid conflicts. Stefan Hajnoczi (5): virtio-blk: enforce iothread-vq-mapping validation virtio-blk: clarify that there is at least 1 virtqueue virtio-blk: add vq_rq[] bounds check in virtio_blk_dma_restart_cb() virtio-blk: declare VirtIOBlock::rq with a type monitor: use aio_co_reschedule_self() include/hw/virtio/virtio-blk.h | 2 +- hw/block/virtio-blk.c | 194 ++--- qapi/qmp-dispatch.c| 7 +- 3 files changed, 112 insertions(+), 91 deletions(-) -- 2.43.0
[PATCH 2/5] virtio-blk: clarify that there is at least 1 virtqueue
It is not possible to instantiate a virtio-blk device with 0 virtqueues. The following check is located in ->realize(): if (!conf->num_queues) { error_setg(errp, "num-queues property must be larger than 0"); return; } Later on we access s->vq_aio_context[0] under the assumption that there is as least one virtqueue. Hanna Czenczek noted that it would help to show that the array index is already valid. Add an assertion to document that s->vq_aio_context[0] is always safe...and catch future code changes that break this assumption. Suggested-by: Hanna Czenczek Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index e8b37fd5f4..a0735a9bca 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1825,6 +1825,7 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) * Try to change the AioContext so that block jobs and other operations can * co-locate their activity in the same AioContext. If it fails, nevermind. */ +assert(nvqs > 0); /* enforced during ->realize() */ r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], _err); if (r < 0) { -- 2.43.0
[PATCH 1/5] virtio-blk: enforce iothread-vq-mapping validation
Hanna Czenczek noticed that the safety of `vq_aio_context[vq->value] = ctx;` with user-defined vq->value inputs is not obvious. The code is structured in validate() + apply() steps so input validation is there, but it happens way earlier and there is nothing that guarantees apply() can only be called with validated inputs. This patch moves the validate() call inside the apply() function so validation is guaranteed. I also added the bounds checking assertion that Hanna suggested. Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 192 +++--- 1 file changed, 107 insertions(+), 85 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 227d83569f..e8b37fd5f4 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1485,6 +1485,72 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, return 0; } +static void virtio_resize_cb(void *opaque) +{ +VirtIODevice *vdev = opaque; + +assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +virtio_notify_config(vdev); +} + +static void virtio_blk_resize(void *opaque) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + +/* + * virtio_notify_config() needs to acquire the BQL, + * so it can't be called from an iothread. Instead, schedule + * it to be run in the main context BH. + */ +aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); +} + +static void virtio_blk_ioeventfd_detach(VirtIOBlock *s) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(s); + +for (uint16_t i = 0; i < s->conf.num_queues; i++) { +VirtQueue *vq = virtio_get_queue(vdev, i); +virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); +} +} + +static void virtio_blk_ioeventfd_attach(VirtIOBlock *s) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(s); + +for (uint16_t i = 0; i < s->conf.num_queues; i++) { +VirtQueue *vq = virtio_get_queue(vdev, i); +virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); +} +} + +/* Suspend virtqueue ioeventfd processing during drain */ +static void virtio_blk_drained_begin(void *opaque) +{ +VirtIOBlock *s = opaque; + +if (s->ioeventfd_started) { +virtio_blk_ioeventfd_detach(s); +} +} + +/* Resume virtqueue ioeventfd processing after drain */ +static void virtio_blk_drained_end(void *opaque) +{ +VirtIOBlock *s = opaque; + +if (s->ioeventfd_started) { +virtio_blk_ioeventfd_attach(s); +} +} + +static const BlockDevOps virtio_block_ops = { +.resize_cb = virtio_blk_resize, +.drained_begin = virtio_blk_drained_begin, +.drained_end = virtio_blk_drained_end, +}; + static bool validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, uint16_t num_queues, Error **errp) @@ -1547,81 +1613,33 @@ validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, return true; } -static void virtio_resize_cb(void *opaque) -{ -VirtIODevice *vdev = opaque; - -assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -virtio_notify_config(vdev); -} - -static void virtio_blk_resize(void *opaque) -{ -VirtIODevice *vdev = VIRTIO_DEVICE(opaque); - -/* - * virtio_notify_config() needs to acquire the BQL, - * so it can't be called from an iothread. Instead, schedule - * it to be run in the main context BH. - */ -aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); -} - -static void virtio_blk_ioeventfd_detach(VirtIOBlock *s) -{ -VirtIODevice *vdev = VIRTIO_DEVICE(s); - -for (uint16_t i = 0; i < s->conf.num_queues; i++) { -VirtQueue *vq = virtio_get_queue(vdev, i); -virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); -} -} - -static void virtio_blk_ioeventfd_attach(VirtIOBlock *s) -{ -VirtIODevice *vdev = VIRTIO_DEVICE(s); - -for (uint16_t i = 0; i < s->conf.num_queues; i++) { -VirtQueue *vq = virtio_get_queue(vdev, i); -virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); -} -} - -/* Suspend virtqueue ioeventfd processing during drain */ -static void virtio_blk_drained_begin(void *opaque) -{ -VirtIOBlock *s = opaque; - -if (s->ioeventfd_started) { -virtio_blk_ioeventfd_detach(s); -} -} - -/* Resume virtqueue ioeventfd processing after drain */ -static void virtio_blk_drained_end(void *opaque) -{ -VirtIOBlock *s = opaque; - -if (s->ioeventfd_started) { -virtio_blk_ioeventfd_attach(s); -} -} - -static const BlockDevOps virtio_block_ops = { -.resize_cb = virtio_blk_resize, -.drained_begin = virtio_blk_drained_begin, -.drained_end = virtio_blk_drained_end, -}; - -/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ -static void -apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, - AioContext **vq_aio_context, uint16_t
[PATCH 5/5] monitor: use aio_co_reschedule_self()
The aio_co_reschedule_self() API is designed to avoid the race condition between scheduling the coroutine in another AioContext and yielding. The QMP dispatch code uses the open-coded version that appears susceptible to the race condition at first glance: aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); qemu_coroutine_yield(); The code is actually safe because the iohandler and qemu_aio_context AioContext run under the Big QEMU Lock. Nevertheless, set a good example and use aio_co_reschedule_self() so it's obvious that there is no race. Suggested-by: Hanna Reitz Signed-off-by: Stefan Hajnoczi --- qapi/qmp-dispatch.c | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c index 176b549473..f3488afeef 100644 --- a/qapi/qmp-dispatch.c +++ b/qapi/qmp-dispatch.c @@ -212,8 +212,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ * executing the command handler so that it can make progress if it * involves an AIO_WAIT_WHILE(). */ -aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); -qemu_coroutine_yield(); +aio_co_reschedule_self(qemu_get_aio_context()); } monitor_set_cur(qemu_coroutine_self(), cur_mon); @@ -227,9 +226,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ * Move back to iohandler_ctx so that nested event loops for * qemu_aio_context don't start new monitor commands. */ -aio_co_schedule(iohandler_get_aio_context(), -qemu_coroutine_self()); -qemu_coroutine_yield(); +aio_co_reschedule_self(iohandler_get_aio_context()); } } else { /* -- 2.43.0
Re: [PATCH] iotests: give tempdir an identifying name
05.02.2024 18:51, Daniel P. Berrangé wrote: If something goes wrong causing the iotests not to cleanup their temporary directory, it is useful if the dir had an identifying name to show what is to blame. Signed-off-by: Daniel P. Berrangé Revieved-by: Michael Tokarev Thank you again for the quick good work! /mjt tests/qemu-iotests/testenv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py index 3ff38f2661..588f30a4f1 100644 --- a/tests/qemu-iotests/testenv.py +++ b/tests/qemu-iotests/testenv.py @@ -126,7 +126,7 @@ def init_directories(self) -> None: self.tmp_sock_dir = False Path(self.sock_dir).mkdir(parents=True, exist_ok=True) except KeyError: -self.sock_dir = tempfile.mkdtemp() +self.sock_dir = tempfile.mkdtemp(prefix="qemu-iotests-") self.tmp_sock_dir = True self.sample_img_dir = os.getenv('SAMPLE_IMG_DIR',
Re: [PATCH] iotests: fix leak of tmpdir in dry-run mode
05.02.2024 18:40, Daniel P. Berrangé : Creating an instance of the 'TestEnv' class will create a temporary directory. This dir is only deleted, however, in the __exit__ handler invoked by a context manager. In dry-run mode, we don't use the TestEnv via a context manager, so were leaking the temporary directory. Since meson invokes 'check' 5 times on each configure run, developers /tmp was filling up with empty temporary directories. Signed-off-by: Daniel P. Berrangé --- tests/qemu-iotests/check | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index f2e9d27dcf..56d88ca423 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -184,7 +184,8 @@ if __name__ == '__main__': sys.exit(str(e)) if args.dry_run: -print('\n'.join([os.path.basename(t) for t in tests])) +with env: +print('\n'.join([os.path.basename(t) for t in tests])) else: with TestRunner(env, tap=args.tap, color=args.color) as tr: Reviewed-by: Michael Tokarev (with my limited understanding of this code) Thanks! /mjt
[PATCH] iotests: give tempdir an identifying name
If something goes wrong causing the iotests not to cleanup their temporary directory, it is useful if the dir had an identifying name to show what is to blame. Signed-off-by: Daniel P. Berrangé --- tests/qemu-iotests/testenv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py index 3ff38f2661..588f30a4f1 100644 --- a/tests/qemu-iotests/testenv.py +++ b/tests/qemu-iotests/testenv.py @@ -126,7 +126,7 @@ def init_directories(self) -> None: self.tmp_sock_dir = False Path(self.sock_dir).mkdir(parents=True, exist_ok=True) except KeyError: -self.sock_dir = tempfile.mkdtemp() +self.sock_dir = tempfile.mkdtemp(prefix="qemu-iotests-") self.tmp_sock_dir = True self.sample_img_dir = os.getenv('SAMPLE_IMG_DIR', -- 2.43.0
Re: [PATCH] iotests: fix leak of tmpdir in dry-run mode
On Mon, 5 Feb 2024 at 15:41, Daniel P. Berrangé wrote: > > Creating an instance of the 'TestEnv' class will create a temporary > directory. This dir is only deleted, however, in the __exit__ handler > invoked by a context manager. > > In dry-run mode, we don't use the TestEnv via a context manager, so > were leaking the temporary directory. Since meson invokes 'check' > 5 times on each configure run, developers /tmp was filling up with > empty temporary directories. > > Signed-off-by: Daniel P. Berrangé Could we also arrange for the temp directory to be created with a name that makes it easy to identify what has created it? Very generic names like "/tmp/tmp" are very hard to pin down to the code that created them. thanks -- PMM
[PATCH] iotests: fix leak of tmpdir in dry-run mode
Creating an instance of the 'TestEnv' class will create a temporary directory. This dir is only deleted, however, in the __exit__ handler invoked by a context manager. In dry-run mode, we don't use the TestEnv via a context manager, so were leaking the temporary directory. Since meson invokes 'check' 5 times on each configure run, developers /tmp was filling up with empty temporary directories. Signed-off-by: Daniel P. Berrangé --- tests/qemu-iotests/check | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index f2e9d27dcf..56d88ca423 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -184,7 +184,8 @@ if __name__ == '__main__': sys.exit(str(e)) if args.dry_run: -print('\n'.join([os.path.basename(t) for t in tests])) +with env: +print('\n'.join([os.path.basename(t) for t in tests])) else: with TestRunner(env, tap=args.tap, color=args.color) as tr: -- 2.43.0
Re: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE compatibility
Am 05.02.2024 um 13:22 hat Eugenio Perez Martin geschrieben: > On Fri, Feb 2, 2024 at 2:25 PM Kevin Wolf wrote: > > > > VDUSE requires that virtqueues are first enabled before the DRIVER_OK > > status flag is set; with the current API of the kernel module, it is > > impossible to enable the opposite order in our block export code because > > userspace is not notified when a virtqueue is enabled. > > > > This requirement also mathces the normal initialisation order as done by > > the generic vhost code in QEMU. However, commit 6c482547 accidentally > > changed the order for vdpa-dev and broke access to VDUSE devices with > > this. > > > > This changes vdpa-dev to use the normal order again and use the standard > > vhost callback .vhost_set_vring_enable for this. VDUSE devices can be > > used with vdpa-dev again after this fix. > > > > Cc: qemu-sta...@nongnu.org > > Fixes: 6c4825476a4351530bcac17abab72295b75ffe98 > > Signed-off-by: Kevin Wolf > > --- > > hw/virtio/vdpa-dev.c | 5 + > > hw/virtio/vhost-vdpa.c | 17 + > > 2 files changed, 18 insertions(+), 4 deletions(-) > > > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c > > index eb9ecea83b..13e87f06f6 100644 > > --- a/hw/virtio/vdpa-dev.c > > +++ b/hw/virtio/vdpa-dev.c > > @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice > > *vdev, Error **errp) > > > > s->dev.acked_features = vdev->guest_features; > > > > -ret = vhost_dev_start(>dev, vdev, false); > > +ret = vhost_dev_start(>dev, vdev, true); > > if (ret < 0) { > > error_setg_errno(errp, -ret, "Error starting vhost"); > > goto err_guest_notifiers; > > } > > -for (i = 0; i < s->dev.nvqs; ++i) { > > -vhost_vdpa_set_vring_ready(>vdpa, i); > > -} > > s->started = true; > > > > /* > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > > index 3a43beb312..c4574d56c5 100644 > > --- a/hw/virtio/vhost-vdpa.c > > +++ b/hw/virtio/vhost-vdpa.c > > @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, > > unsigned idx) > > return r; > > } > > > > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable) > > +{ > > +struct vhost_vdpa *v = dev->opaque; > > +unsigned int i; > > +int ret; > > + > > +for (i = 0; i < dev->nvqs; ++i) { > > +ret = vhost_vdpa_set_vring_ready(v, i); > > +if (ret < 0) { > > +return ret; > > +} > > +} > > + > > +return 0; > > +} > > + > > static int vhost_vdpa_set_config_call(struct vhost_dev *dev, > > int fd) > > { > > @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = { > > .vhost_set_features = vhost_vdpa_set_features, > > .vhost_reset_device = vhost_vdpa_reset_device, > > .vhost_get_vq_index = vhost_vdpa_get_vq_index, > > +.vhost_set_vring_enable = vhost_vdpa_set_vring_enable, > > .vhost_get_config = vhost_vdpa_get_config, > > .vhost_set_config = vhost_vdpa_set_config, > > .vhost_requires_shm_log = NULL, > > vhost-vdpa net enables CVQ before dataplane ones to configure all the > device in the destination of a live migration. To go back again to > this callback would cause the device to enable the dataplane before > virtqueues are configured again. Not that it makes a difference, but I don't think it would actually be going back. Even before your commit 6c482547, we were not making use of the generic callback but just called the function in a slightly different place (but less different than after commit 6c482547). But anyway... Why don't the other vhost backend need the same for vhost-net then? Do they just not support live migration? I don't know the code well enough to say where the problem is, but if vhost-vdpa networking code relies on the usual vhost operations not being implemented and bypasses VhostOps to replace it, that sounds like a design problem to me. Maybe VhostOps needs a new operation to enable just a single virtqueue that can be used by the networking code instead? > How does VDUSE userspace knows how many queues should enable? Can't > the kernel perform the necessary actions after DRIVER_OK, like > configuring the kick etc? Not sure if I understand the question. The vdpa kernel interface always enables individual queues, so the VDUSE userspace will enable whatever queues it was asked to enable. The only restriction is that the queues need to be enabled before setting DRIVER_OK. The interface that enables all virtqueues at once seems to be just .vhost_set_vring_enable in QEMU. Kevin
Re: NVME hotplug support ?
On 1/29/24 16:35, Hannes Reinecke wrote: On 1/29/24 14:13, Damien Hedde wrote: On 1/24/24 08:47, Hannes Reinecke wrote: On 1/24/24 07:52, Philippe Mathieu-Daudé wrote: Hi Hannes, [+Markus as QOM/QDev rubber duck] On 23/1/24 13:40, Hannes Reinecke wrote: On 1/23/24 11:59, Damien Hedde wrote: Hi all, We are currently looking into hotplugging nvme devices and it is currently not possible: When nvme was introduced 2 years ago, the feature was disabled. commit cc6fb6bc506e6c47ed604fcb7b7413dff0b7d845 Author: Klaus Jensen Date: Tue Jul 6 10:48:40 2021 +0200 hw/nvme: mark nvme-subsys non-hotpluggable We currently lack the infrastructure to handle subsystem hotplugging, so disable it. Do someone know what's lacking or anyone have some tips/idea of what we should develop to add the support ? Problem is that the object model is messed up. In qemu namespaces are attached to controllers, which in turn are children of the PCI device. There are subsystems, but these just reference the controller. So if you hotunplug the PCI device you detach/destroy the controller and detach the namespaces from the controller. But if you hotplug the PCI device again the NVMe controller will be attached to the PCI device, but the namespace are still be detached. Klaus said he was going to fix that, and I dimly remember some patches floating around. But apparently it never went anywhere. Fundamental problem is that the NVMe hierarchy as per spec is incompatible with the qemu object model; qemu requires a strict tree model where every object has exactly _one_ parent. The modelling problem is not clear to me. Do you have an example of how the NVMe hierarchy should be? Sure. As per NVMe spec we have this hierarchy: ---> subsys --- | | | V controller namespaces There can be several controllers, and several namespaces. The initiator (ie the linux 'nvme' driver) connects to a controller, queries the subsystem for the attached namespaces, and presents each namespace as a block device. For Qemu we have the problem that every device _must_ be a direct descendant of the parent (expressed by the fact that each 'parent' object is embedded in the device object). So if we were to present a NVMe PCI device, the controller must be derived from the PCI device: pci -> controller but now we have to express the NVMe hierarchy, too: pci -> ctrl1 -> subsys1 -> namespace1 which actually works. We can easily attach several namespaces: pci -> ctrl1 ->subsys1 -> namespace2 For a single controller and a single subsystem. However, as mentioned above, there can be _several_ controllers attached to the same subsystem. So we can express the second controller: pci -> ctrl2 but we cannot attach the controller to 'subsys1' as then 'subsys1' would need to be derived from 'ctrl2', and not (as it is now) from 'ctrl1'. The most logical step would be to have 'subsystems' their own entity, independent of any controllers. But then the block devices (which are derived from the namespaces) could not be traced back to the PCI device, and a PCI hotplug would not 'automatically' disconnect the nvme block devices. Plus the subsystem would be independent from the NVMe PCI devices, so you could have a subsystem with no controllers attached. And one would wonder who should be responsible for cleaning up that. Thanks for the details ! My use case is the simple one with no nvme subsystem/namespaces: - hotplug a pci nvme device (nvme controller) as in the following CLI (which automatically put the drive into a default namespace) ./qemu-system-aarch64 -nographic -M virt \ -drive file=nvme0.disk,if=none,id=nvme-drive0 \ -device nvme,serial=nvme0,id=nvmedev0,drive=nvme-drive0 In the simple tree approach where subsystems and namespaces are not shared by controllers. We could delete the whole nvme hiearchy under the controller while unplugging it ? In your first message, you said > So if you hotunplug the PCI device you detach/destroy the controller > and detach the namespaces from the controller. > But if you hotplug the PCI device again the NVMe controller will be > attached to the PCI device, but the namespace are still be detached. Do you mean that if we unplug the pci device we HAVE to keep some nvme objects so that if we plug the device back we can recover them ? Or just that it's hard to unplug nvme objects if they are not real qom children of pci device ? Key point for trying on PCI hotplug with qemu is to attach the PCI device to it's own PCI root port. Cf the mail from Klaus Jensen for details. Cheers, Hannes Thanks a lot from both of you. I missed that. Damien
Re: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE compatibility
Am 02.02.2024 um 14:25 hat Kevin Wolf geschrieben: > VDUSE requires that virtqueues are first enabled before the DRIVER_OK > status flag is set; with the current API of the kernel module, it is > impossible to enable the opposite order in our block export code because > userspace is not notified when a virtqueue is enabled. > > This requirement also mathces the normal initialisation order as done by > the generic vhost code in QEMU. However, commit 6c482547 accidentally > changed the order for vdpa-dev and broke access to VDUSE devices with > this. > > This changes vdpa-dev to use the normal order again and use the standard > vhost callback .vhost_set_vring_enable for this. VDUSE devices can be > used with vdpa-dev again after this fix. > > Cc: qemu-sta...@nongnu.org > Fixes: 6c4825476a4351530bcac17abab72295b75ffe98 > Signed-off-by: Kevin Wolf > --- > hw/virtio/vdpa-dev.c | 5 + > hw/virtio/vhost-vdpa.c | 17 + > 2 files changed, 18 insertions(+), 4 deletions(-) > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c > index eb9ecea83b..13e87f06f6 100644 > --- a/hw/virtio/vdpa-dev.c > +++ b/hw/virtio/vdpa-dev.c > @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, > Error **errp) > > s->dev.acked_features = vdev->guest_features; > > -ret = vhost_dev_start(>dev, vdev, false); > +ret = vhost_dev_start(>dev, vdev, true); > if (ret < 0) { > error_setg_errno(errp, -ret, "Error starting vhost"); > goto err_guest_notifiers; > } > -for (i = 0; i < s->dev.nvqs; ++i) { > -vhost_vdpa_set_vring_ready(>vdpa, i); > -} > s->started = true; > > /* > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > index 3a43beb312..c4574d56c5 100644 > --- a/hw/virtio/vhost-vdpa.c > +++ b/hw/virtio/vhost-vdpa.c > @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, > unsigned idx) > return r; > } > > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable) > +{ > +struct vhost_vdpa *v = dev->opaque; > +unsigned int i; > +int ret; > + > +for (i = 0; i < dev->nvqs; ++i) { > +ret = vhost_vdpa_set_vring_ready(v, i); > +if (ret < 0) { > +return ret; > +} > +} > + > +return 0; > +} Oops, this forgets to actually use the @enable parameter, and always enables the queue even if the caller wanted to disable it. I'll fix this in a v2, but I'd first like to see if something has to change to address Stefano's concern, too. Kevin
Re: [PATCH] tests/cdrom-test: Add cdrom test for LoongArch virt machine
Hi Bibo, On 5/2/24 03:13, Bibo Mao wrote: The cdrom test skips to execute on LoongArch system with command "make check", this patch enables cdrom test for LoongArch virt machine platform. With this patch, cdrom test passes to run on LoongArch virt machine type. Signed-off-by: Bibo Mao --- tests/qtest/cdrom-test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c index 0945383789..c8b97d8d9a 100644 --- a/tests/qtest/cdrom-test.c +++ b/tests/qtest/cdrom-test.c @@ -271,6 +271,9 @@ int main(int argc, char **argv) const char *virtmachine[] = { "virt", NULL }; add_cdrom_param_tests(virtmachine); } +} else if (g_str_equal(arch, "loongarch64")) { +const char *virtmachine[] = { "virt", NULL }; +add_cdrom_param_tests(virtmachine); What is the default device used, virtio-blk-pci?
Re: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE compatibility
On Fri, Feb 2, 2024 at 2:25 PM Kevin Wolf wrote: > > VDUSE requires that virtqueues are first enabled before the DRIVER_OK > status flag is set; with the current API of the kernel module, it is > impossible to enable the opposite order in our block export code because > userspace is not notified when a virtqueue is enabled. > > This requirement also mathces the normal initialisation order as done by > the generic vhost code in QEMU. However, commit 6c482547 accidentally > changed the order for vdpa-dev and broke access to VDUSE devices with > this. > > This changes vdpa-dev to use the normal order again and use the standard > vhost callback .vhost_set_vring_enable for this. VDUSE devices can be > used with vdpa-dev again after this fix. > > Cc: qemu-sta...@nongnu.org > Fixes: 6c4825476a4351530bcac17abab72295b75ffe98 > Signed-off-by: Kevin Wolf > --- > hw/virtio/vdpa-dev.c | 5 + > hw/virtio/vhost-vdpa.c | 17 + > 2 files changed, 18 insertions(+), 4 deletions(-) > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c > index eb9ecea83b..13e87f06f6 100644 > --- a/hw/virtio/vdpa-dev.c > +++ b/hw/virtio/vdpa-dev.c > @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, > Error **errp) > > s->dev.acked_features = vdev->guest_features; > > -ret = vhost_dev_start(>dev, vdev, false); > +ret = vhost_dev_start(>dev, vdev, true); > if (ret < 0) { > error_setg_errno(errp, -ret, "Error starting vhost"); > goto err_guest_notifiers; > } > -for (i = 0; i < s->dev.nvqs; ++i) { > -vhost_vdpa_set_vring_ready(>vdpa, i); > -} > s->started = true; > > /* > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > index 3a43beb312..c4574d56c5 100644 > --- a/hw/virtio/vhost-vdpa.c > +++ b/hw/virtio/vhost-vdpa.c > @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, > unsigned idx) > return r; > } > > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable) > +{ > +struct vhost_vdpa *v = dev->opaque; > +unsigned int i; > +int ret; > + > +for (i = 0; i < dev->nvqs; ++i) { > +ret = vhost_vdpa_set_vring_ready(v, i); > +if (ret < 0) { > +return ret; > +} > +} > + > +return 0; > +} > + > static int vhost_vdpa_set_config_call(struct vhost_dev *dev, > int fd) > { > @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = { > .vhost_set_features = vhost_vdpa_set_features, > .vhost_reset_device = vhost_vdpa_reset_device, > .vhost_get_vq_index = vhost_vdpa_get_vq_index, > +.vhost_set_vring_enable = vhost_vdpa_set_vring_enable, > .vhost_get_config = vhost_vdpa_get_config, > .vhost_set_config = vhost_vdpa_set_config, > .vhost_requires_shm_log = NULL, > -- > 2.43.0 > vhost-vdpa net enables CVQ before dataplane ones to configure all the device in the destination of a live migration. To go back again to this callback would cause the device to enable the dataplane before virtqueues are configured again. How does VDUSE userspace knows how many queues should enable? Can't the kernel perform the necessary actions after DRIVER_OK, like configuring the kick etc? Thanks!
Re: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE compatibility
On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote: VDUSE requires that virtqueues are first enabled before the DRIVER_OK status flag is set; with the current API of the kernel module, it is impossible to enable the opposite order in our block export code because userspace is not notified when a virtqueue is enabled. Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message, I'll start another thread about that, but in the meantime I agree that we should fix QEMU since we need to work properly with old kernels as well. This requirement also mathces the normal initialisation order as done by the generic vhost code in QEMU. However, commit 6c482547 accidentally changed the order for vdpa-dev and broke access to VDUSE devices with this. This changes vdpa-dev to use the normal order again and use the standard vhost callback .vhost_set_vring_enable for this. VDUSE devices can be used with vdpa-dev again after this fix. I like this approach and the patch LGTM, but I'm a bit worried about this function in hw/net/vhost_net.c: int vhost_set_vring_enable(NetClientState *nc, int enable) { VHostNetState *net = get_vhost_net(nc); const VhostOps *vhost_ops = net->dev.vhost_ops; nc->vring_enable = enable; if (vhost_ops && vhost_ops->vhost_set_vring_enable) { return vhost_ops->vhost_set_vring_enable(>dev, enable); } return 0; } @Eugenio, @Jason, should we change some things there if vhost-vdpa implements the vhost_set_vring_enable callback? Do you remember why we didn't implement it from the beginning? Thanks, Stefano Cc: qemu-sta...@nongnu.org Fixes: 6c4825476a4351530bcac17abab72295b75ffe98 Signed-off-by: Kevin Wolf --- hw/virtio/vdpa-dev.c | 5 + hw/virtio/vhost-vdpa.c | 17 + 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index eb9ecea83b..13e87f06f6 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) s->dev.acked_features = vdev->guest_features; -ret = vhost_dev_start(>dev, vdev, false); +ret = vhost_dev_start(>dev, vdev, true); if (ret < 0) { error_setg_errno(errp, -ret, "Error starting vhost"); goto err_guest_notifiers; } -for (i = 0; i < s->dev.nvqs; ++i) { -vhost_vdpa_set_vring_ready(>vdpa, i); -} s->started = true; /* diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 3a43beb312..c4574d56c5 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) return r; } +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable) +{ +struct vhost_vdpa *v = dev->opaque; +unsigned int i; +int ret; + +for (i = 0; i < dev->nvqs; ++i) { +ret = vhost_vdpa_set_vring_ready(v, i); +if (ret < 0) { +return ret; +} +} + +return 0; +} + static int vhost_vdpa_set_config_call(struct vhost_dev *dev, int fd) { @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = { .vhost_set_features = vhost_vdpa_set_features, .vhost_reset_device = vhost_vdpa_reset_device, .vhost_get_vq_index = vhost_vdpa_get_vq_index, +.vhost_set_vring_enable = vhost_vdpa_set_vring_enable, .vhost_get_config = vhost_vdpa_get_config, .vhost_set_config = vhost_vdpa_set_config, .vhost_requires_shm_log = NULL, -- 2.43.0
Re: [PATCH] tests/cdrom-test: Add cdrom test for LoongArch virt machine
On 2024/2/5 下午3:47, Thomas Huth wrote: On 05/02/2024 03.13, Bibo Mao wrote: The cdrom test skips to execute on LoongArch system with command "make check" Are you sure the test is marked with "skip"? ... it should at least test with the "none" machine...? With the latest code, cdrom testcase passes to run. It is strange that about two weeks ago, the result displays SKIP, so I post this patch -:) Regards Bibo Mao this patch enables cdrom test for LoongArch virt machine platform. With this patch, cdrom test passes to run on LoongArch virt machine type. Signed-off-by: Bibo Mao --- tests/qtest/cdrom-test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c index 0945383789..c8b97d8d9a 100644 --- a/tests/qtest/cdrom-test.c +++ b/tests/qtest/cdrom-test.c @@ -271,6 +271,9 @@ int main(int argc, char **argv) const char *virtmachine[] = { "virt", NULL }; add_cdrom_param_tests(virtmachine); } + } else if (g_str_equal(arch, "loongarch64")) { + const char *virtmachine[] = { "virt", NULL }; + add_cdrom_param_tests(virtmachine); } else { const char *nonemachine[] = { "none", NULL }; add_cdrom_param_tests(nonemachine); Anyway, using the virt machine is certainly better than the "none" machine, so: Acked-by: Thomas Huth