Re: [PATCH 1/5] virtio-blk: enforce iothread-vq-mapping validation

2024-02-05 Thread Manos Pitsidianakis

On Mon, 05 Feb 2024 19:26, Stefan Hajnoczi  wrote:

Hanna Czenczek  noticed that the safety of
`vq_aio_context[vq->value] = ctx;` with user-defined vq->value inputs is
not obvious.

The code is structured in validate() + apply() steps so input validation
is there, but it happens way earlier and there is nothing that
guarantees apply() can only be called with validated inputs.

This patch moves the validate() call inside the apply() function so
validation is guaranteed. I also added the bounds checking assertion
that Hanna suggested.

Signed-off-by: Stefan Hajnoczi 
---
hw/block/virtio-blk.c | 192 +++---
1 file changed, 107 insertions(+), 85 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 227d83569f..e8b37fd5f4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1485,6 +1485,72 @@ static int virtio_blk_load_device(VirtIODevice *vdev, 
QEMUFile *f,
return 0;
}

+static void virtio_resize_cb(void *opaque)
+{
+VirtIODevice *vdev = opaque;
+
+assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+virtio_notify_config(vdev);
+}
+
+static void virtio_blk_resize(void *opaque)
+{
+VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+
+/*
+ * virtio_notify_config() needs to acquire the BQL,
+ * so it can't be called from an iothread. Instead, schedule
+ * it to be run in the main context BH.
+ */
+aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
+}
+
+static void virtio_blk_ioeventfd_detach(VirtIOBlock *s)
+{
+VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+for (uint16_t i = 0; i < s->conf.num_queues; i++) {
+VirtQueue *vq = virtio_get_queue(vdev, i);
+virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]);
+}
+}
+
+static void virtio_blk_ioeventfd_attach(VirtIOBlock *s)
+{
+VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+for (uint16_t i = 0; i < s->conf.num_queues; i++) {
+VirtQueue *vq = virtio_get_queue(vdev, i);
+virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]);
+}
+}
+
+/* Suspend virtqueue ioeventfd processing during drain */
+static void virtio_blk_drained_begin(void *opaque)
+{
+VirtIOBlock *s = opaque;
+
+if (s->ioeventfd_started) {
+virtio_blk_ioeventfd_detach(s);
+}
+}
+
+/* Resume virtqueue ioeventfd processing after drain */
+static void virtio_blk_drained_end(void *opaque)
+{
+VirtIOBlock *s = opaque;
+
+if (s->ioeventfd_started) {
+virtio_blk_ioeventfd_attach(s);
+}
+}
+
+static const BlockDevOps virtio_block_ops = {
+.resize_cb = virtio_blk_resize,
+.drained_begin = virtio_blk_drained_begin,
+.drained_end   = virtio_blk_drained_end,
+};
+
static bool
validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
uint16_t num_queues, Error **errp)
@@ -1547,81 +1613,33 @@ 
validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
return true;
}

-static void virtio_resize_cb(void *opaque)
-{
-VirtIODevice *vdev = opaque;
-
-assert(qemu_get_current_aio_context() == qemu_get_aio_context());
-virtio_notify_config(vdev);
-}
-
-static void virtio_blk_resize(void *opaque)
-{
-VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
-
-/*
- * virtio_notify_config() needs to acquire the BQL,
- * so it can't be called from an iothread. Instead, schedule
- * it to be run in the main context BH.
- */
-aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
-}
-
-static void virtio_blk_ioeventfd_detach(VirtIOBlock *s)
-{
-VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
-for (uint16_t i = 0; i < s->conf.num_queues; i++) {
-VirtQueue *vq = virtio_get_queue(vdev, i);
-virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]);
-}
-}
-
-static void virtio_blk_ioeventfd_attach(VirtIOBlock *s)
-{
-VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
-for (uint16_t i = 0; i < s->conf.num_queues; i++) {
-VirtQueue *vq = virtio_get_queue(vdev, i);
-virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]);
-}
-}
-
-/* Suspend virtqueue ioeventfd processing during drain */
-static void virtio_blk_drained_begin(void *opaque)
-{
-VirtIOBlock *s = opaque;
-
-if (s->ioeventfd_started) {
-virtio_blk_ioeventfd_detach(s);
-}
-}
-
-/* Resume virtqueue ioeventfd processing after drain */
-static void virtio_blk_drained_end(void *opaque)
-{
-VirtIOBlock *s = opaque;
-
-if (s->ioeventfd_started) {
-virtio_blk_ioeventfd_attach(s);
-}
-}
-
-static const BlockDevOps virtio_block_ops = {
-.resize_cb = virtio_blk_resize,
-.drained_begin = virtio_blk_drained_begin,
-.drained_end   = virtio_blk_drained_end,
-};
-
-/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */
-static void
-apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
- 

Re: [PATCH 5/5] monitor: use aio_co_reschedule_self()

2024-02-05 Thread Manos Pitsidianakis

On Mon, 05 Feb 2024 19:26, Stefan Hajnoczi  wrote:

The aio_co_reschedule_self() API is designed to avoid the race
condition between scheduling the coroutine in another AioContext and
yielding.

The QMP dispatch code uses the open-coded version that appears
susceptible to the race condition at first glance:

 aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self());
 qemu_coroutine_yield();

The code is actually safe because the iohandler and qemu_aio_context
AioContext run under the Big QEMU Lock. Nevertheless, set a good example
and use aio_co_reschedule_self() so it's obvious that there is no race.

Suggested-by: Hanna Reitz 
Signed-off-by: Stefan Hajnoczi 
---
qapi/qmp-dispatch.c | 7 ++-
1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 176b549473..f3488afeef 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -212,8 +212,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList 
*cmds, QObject *requ
 * executing the command handler so that it can make progress if it
 * involves an AIO_WAIT_WHILE().
 */
-aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self());
-qemu_coroutine_yield();
+aio_co_reschedule_self(qemu_get_aio_context());
}

monitor_set_cur(qemu_coroutine_self(), cur_mon);
@@ -227,9 +226,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList 
*cmds, QObject *requ
 * Move back to iohandler_ctx so that nested event loops for
 * qemu_aio_context don't start new monitor commands.
 */
-aio_co_schedule(iohandler_get_aio_context(),
-qemu_coroutine_self());
-qemu_coroutine_yield();
+aio_co_reschedule_self(iohandler_get_aio_context());
}
} else {
   /*
--
2.43.0




Reviewed-by: Manos Pitsidianakis 



Re: [PATCH 2/5] virtio-blk: clarify that there is at least 1 virtqueue

2024-02-05 Thread Manos Pitsidianakis

On Mon, 05 Feb 2024 19:26, Stefan Hajnoczi  wrote:

It is not possible to instantiate a virtio-blk device with 0 virtqueues.
The following check is located in ->realize():

 if (!conf->num_queues) {
 error_setg(errp, "num-queues property must be larger than 0");
 return;
 }

Later on we access s->vq_aio_context[0] under the assumption that there
is as least one virtqueue. Hanna Czenczek  noted that
it would help to show that the array index is already valid.

Add an assertion to document that s->vq_aio_context[0] is always
safe...and catch future code changes that break this assumption.

Suggested-by: Hanna Czenczek 
Signed-off-by: Stefan Hajnoczi 
---
hw/block/virtio-blk.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index e8b37fd5f4..a0735a9bca 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1825,6 +1825,7 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev)
 * Try to change the AioContext so that block jobs and other operations can
 * co-locate their activity in the same AioContext. If it fails, nevermind.
 */
+assert(nvqs > 0); /* enforced during ->realize() */
r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0],
_err);
if (r < 0) {
--
2.43.0



Reviewed-by: Manos Pitsidianakis 



Re: [PATCH 3/5] virtio-blk: add vq_rq[] bounds check in virtio_blk_dma_restart_cb()

2024-02-05 Thread Manos Pitsidianakis

On Mon, 05 Feb 2024 19:26, Stefan Hajnoczi  wrote:

Hanna Czenczek  noted that the array index in
virtio_blk_dma_restart_cb() is not bounds-checked:

 g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues);
 ...
 while (rq) {
 VirtIOBlockReq *next = rq->next;
 uint16_t idx = virtio_get_queue_index(rq->vq);

 rq->next = vq_rq[idx];
^^

The code is correct because both rq->vq and vq_rq[] depend on
num_queues, but this is indirect and not 100% obvious. Add an assertion.


This sentence could be useful as an inline comment too.



Suggested-by: Hanna Czenczek 
Signed-off-by: Stefan Hajnoczi 
---
hw/block/virtio-blk.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index a0735a9bca..f3193f4b75 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1209,6 +1209,7 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool 
running,
VirtIOBlockReq *next = rq->next;
uint16_t idx = virtio_get_queue_index(rq->vq);

+assert(idx < num_queues);
rq->next = vq_rq[idx];
vq_rq[idx] = rq;
rq = next;
--
2.43.0



Reviewed-by: Manos Pitsidianakis 



Re: [PATCH 4/5] virtio-blk: declare VirtIOBlock::rq with a type

2024-02-05 Thread Manos Pitsidianakis

On Mon, 05 Feb 2024 19:26, Stefan Hajnoczi  wrote:

The VirtIOBlock::rq field has had the type void * since its introduction
in commit 869a5c6df19a ("Stop VM on error in virtio-blk. (Gleb
Natapov)").

Perhaps this was done to avoid the forward declaration of
VirtIOBlockReq.

Hanna Czenczek  pointed out the missing type. Specify
the actual type because there is no need to use void * here.

Suggested-by: Hanna Czenczek 
Signed-off-by: Stefan Hajnoczi 
---
include/hw/virtio/virtio-blk.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 833a9a344f..5c14110c4b 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -55,7 +55,7 @@ struct VirtIOBlock {
VirtIODevice parent_obj;
BlockBackend *blk;
QemuMutex rq_lock;
-void *rq; /* protected by rq_lock */
+struct VirtIOBlockReq *rq; /* protected by rq_lock */
VirtIOBlkConf conf;
unsigned short sector_mask;
bool original_wce;
--
2.43.0



Reviewed-by: Manos Pitsidianakis 



Re: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE compatibility

2024-02-05 Thread Jason Wang
On Mon, Feb 5, 2024 at 6:51 PM Stefano Garzarella  wrote:
>
> On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:
> >VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> >status flag is set; with the current API of the kernel module, it is
> >impossible to enable the opposite order in our block export code because
> >userspace is not notified when a virtqueue is enabled.

Did this mean virtio-blk will enable a virtqueue after DRIVER_OK? Sepc
is not clear about this and that's why we introduce
VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK.

>
> Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,

I think you meant when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is
negotiated. If this is truth, it seems a little more complicated, for
example the get_backend_features needs to be forward to the userspace?
This seems suboptimal to implement this in the spec first and then we
can leverage the features. Or we can have another parameter for the
ioctl that creates the vduse device.

> I'll start another thread about that, but in the meantime I agree that
> we should fix QEMU since we need to work properly with old kernels as
> well.
>
> >
> >This requirement also mathces the normal initialisation order as done by
> >the generic vhost code in QEMU. However, commit 6c482547 accidentally
> >changed the order for vdpa-dev and broke access to VDUSE devices with
> >this.
> >
> >This changes vdpa-dev to use the normal order again and use the standard
> >vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> >used with vdpa-dev again after this fix.
>
> I like this approach and the patch LGTM, but I'm a bit worried about
> this function in hw/net/vhost_net.c:
>
>  int vhost_set_vring_enable(NetClientState *nc, int enable)
>  {
>  VHostNetState *net = get_vhost_net(nc);
>  const VhostOps *vhost_ops = net->dev.vhost_ops;
>
>  nc->vring_enable = enable;
>
>  if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
>  return vhost_ops->vhost_set_vring_enable(>dev, enable);
>  }
>
>  return 0;
>  }
>
> @Eugenio, @Jason, should we change some things there if vhost-vdpa
> implements the vhost_set_vring_enable callback?

Eugenio may know more, I remember we need to enable cvq first for
shadow virtqueue to restore some states.

>
> Do you remember why we didn't implement it from the beginning?

It seems the vrings parameter is introduced after vhost-vdpa is implemented.

Thanks

>
> Thanks,
> Stefano
>
> >
> >Cc: qemu-sta...@nongnu.org
> >Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> >Signed-off-by: Kevin Wolf 
> >---
> > hw/virtio/vdpa-dev.c   |  5 +
> > hw/virtio/vhost-vdpa.c | 17 +
> > 2 files changed, 18 insertions(+), 4 deletions(-)
> >
> >diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> >index eb9ecea83b..13e87f06f6 100644
> >--- a/hw/virtio/vdpa-dev.c
> >+++ b/hw/virtio/vdpa-dev.c
> >@@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, 
> >Error **errp)
> >
> > s->dev.acked_features = vdev->guest_features;
> >
> >-ret = vhost_dev_start(>dev, vdev, false);
> >+ret = vhost_dev_start(>dev, vdev, true);
> > if (ret < 0) {
> > error_setg_errno(errp, -ret, "Error starting vhost");
> > goto err_guest_notifiers;
> > }
> >-for (i = 0; i < s->dev.nvqs; ++i) {
> >-vhost_vdpa_set_vring_ready(>vdpa, i);
> >-}
> > s->started = true;
> >
> > /*
> >diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> >index 3a43beb312..c4574d56c5 100644
> >--- a/hw/virtio/vhost-vdpa.c
> >+++ b/hw/virtio/vhost-vdpa.c
> >@@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, 
> >unsigned idx)
> > return r;
> > }
> >
> >+static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> >+{
> >+struct vhost_vdpa *v = dev->opaque;
> >+unsigned int i;
> >+int ret;
> >+
> >+for (i = 0; i < dev->nvqs; ++i) {
> >+ret = vhost_vdpa_set_vring_ready(v, i);
> >+if (ret < 0) {
> >+return ret;
> >+}
> >+}
> >+
> >+return 0;
> >+}
> >+
> > static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
> >int fd)
> > {
> >@@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
> > .vhost_set_features = vhost_vdpa_set_features,
> > .vhost_reset_device = vhost_vdpa_reset_device,
> > .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> >+.vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> > .vhost_get_config  = vhost_vdpa_get_config,
> > .vhost_set_config = vhost_vdpa_set_config,
> > .vhost_requires_shm_log = NULL,
> >--
> >2.43.0
> >
>




Re: [PATCH] tests/cdrom-test: Add cdrom test for LoongArch virt machine

2024-02-05 Thread maobibo

Hi Thomas,

On 2024/2/5 下午3:47, Thomas Huth wrote:

On 05/02/2024 03.13, Bibo Mao wrote:

The cdrom test skips to execute on LoongArch system with command
"make check"


Are you sure the test is marked with "skip"? ... it should at least test 
with the "none" machine...?


I check again, cdrom test case passes to run with "none" machine. And 
the root cause is that xorriso rpm package is missing so it is skipped.


Regards
Bibo Mao


this patch enables cdrom test for LoongArch virt
machine platform.

With this patch, cdrom test passes to run on LoongArch virt
machine type.

Signed-off-by: Bibo Mao 
---
  tests/qtest/cdrom-test.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c
index 0945383789..c8b97d8d9a 100644
--- a/tests/qtest/cdrom-test.c
+++ b/tests/qtest/cdrom-test.c
@@ -271,6 +271,9 @@ int main(int argc, char **argv)
  const char *virtmachine[] = { "virt", NULL };
  add_cdrom_param_tests(virtmachine);
  }
+    } else if (g_str_equal(arch, "loongarch64")) {
+    const char *virtmachine[] = { "virt", NULL };
+    add_cdrom_param_tests(virtmachine);
  } else {
  const char *nonemachine[] = { "none", NULL };
  add_cdrom_param_tests(nonemachine);


Anyway, using the virt machine is certainly better than the "none" 
machine, so:

Acked-by: Thomas Huth 






Re: [PATCH] tests/cdrom-test: Add cdrom test for LoongArch virt machine

2024-02-05 Thread maobibo

Hi Philippe,

On 2024/2/5 下午8:58, Philippe Mathieu-Daudé wrote:

Hi Bibo,

On 5/2/24 03:13, Bibo Mao wrote:

The cdrom test skips to execute on LoongArch system with command
"make check", this patch enables cdrom test for LoongArch virt
machine platform.

With this patch, cdrom test passes to run on LoongArch virt
machine type.

Signed-off-by: Bibo Mao 
---
  tests/qtest/cdrom-test.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c
index 0945383789..c8b97d8d9a 100644
--- a/tests/qtest/cdrom-test.c
+++ b/tests/qtest/cdrom-test.c
@@ -271,6 +271,9 @@ int main(int argc, char **argv)
  const char *virtmachine[] = { "virt", NULL };
  add_cdrom_param_tests(virtmachine);
  }
+    } else if (g_str_equal(arch, "loongarch64")) {
+    const char *virtmachine[] = { "virt", NULL };
+    add_cdrom_param_tests(virtmachine);


What is the default device used, virtio-blk-pci?


yes, it is. For virt machine type, the default type for block device is
virtio interface, and it is defined at function loongarch_class_init().
   mc->block_default_type = IF_VIRTIO

Regards
Bibo Mao




[PATCH 3/5] virtio-blk: add vq_rq[] bounds check in virtio_blk_dma_restart_cb()

2024-02-05 Thread Stefan Hajnoczi
Hanna Czenczek  noted that the array index in
virtio_blk_dma_restart_cb() is not bounds-checked:

  g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues);
  ...
  while (rq) {
  VirtIOBlockReq *next = rq->next;
  uint16_t idx = virtio_get_queue_index(rq->vq);

  rq->next = vq_rq[idx];
 ^^

The code is correct because both rq->vq and vq_rq[] depend on
num_queues, but this is indirect and not 100% obvious. Add an assertion.

Suggested-by: Hanna Czenczek 
Signed-off-by: Stefan Hajnoczi 
---
 hw/block/virtio-blk.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index a0735a9bca..f3193f4b75 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1209,6 +1209,7 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool 
running,
 VirtIOBlockReq *next = rq->next;
 uint16_t idx = virtio_get_queue_index(rq->vq);
 
+assert(idx < num_queues);
 rq->next = vq_rq[idx];
 vq_rq[idx] = rq;
 rq = next;
-- 
2.43.0




[PATCH 4/5] virtio-blk: declare VirtIOBlock::rq with a type

2024-02-05 Thread Stefan Hajnoczi
The VirtIOBlock::rq field has had the type void * since its introduction
in commit 869a5c6df19a ("Stop VM on error in virtio-blk. (Gleb
Natapov)").

Perhaps this was done to avoid the forward declaration of
VirtIOBlockReq.

Hanna Czenczek  pointed out the missing type. Specify
the actual type because there is no need to use void * here.

Suggested-by: Hanna Czenczek 
Signed-off-by: Stefan Hajnoczi 
---
 include/hw/virtio/virtio-blk.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 833a9a344f..5c14110c4b 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -55,7 +55,7 @@ struct VirtIOBlock {
 VirtIODevice parent_obj;
 BlockBackend *blk;
 QemuMutex rq_lock;
-void *rq; /* protected by rq_lock */
+struct VirtIOBlockReq *rq; /* protected by rq_lock */
 VirtIOBlkConf conf;
 unsigned short sector_mask;
 bool original_wce;
-- 
2.43.0




[PATCH 0/5] virtio-blk: iothread-vq-mapping cleanups

2024-02-05 Thread Stefan Hajnoczi
Hanna reviewed the iothread-vq-mapping patches after they were applied to
qemu.git. This series consists of code cleanups that Hanna identified.

There are no functional changes or bug fixes that need to be backported to the
stable tree here, but it may make sense to backport them in the future to avoid
conflicts.

Stefan Hajnoczi (5):
  virtio-blk: enforce iothread-vq-mapping validation
  virtio-blk: clarify that there is at least 1 virtqueue
  virtio-blk: add vq_rq[] bounds check in virtio_blk_dma_restart_cb()
  virtio-blk: declare VirtIOBlock::rq with a type
  monitor: use aio_co_reschedule_self()

 include/hw/virtio/virtio-blk.h |   2 +-
 hw/block/virtio-blk.c  | 194 ++---
 qapi/qmp-dispatch.c|   7 +-
 3 files changed, 112 insertions(+), 91 deletions(-)

-- 
2.43.0




[PATCH 2/5] virtio-blk: clarify that there is at least 1 virtqueue

2024-02-05 Thread Stefan Hajnoczi
It is not possible to instantiate a virtio-blk device with 0 virtqueues.
The following check is located in ->realize():

  if (!conf->num_queues) {
  error_setg(errp, "num-queues property must be larger than 0");
  return;
  }

Later on we access s->vq_aio_context[0] under the assumption that there
is as least one virtqueue. Hanna Czenczek  noted that
it would help to show that the array index is already valid.

Add an assertion to document that s->vq_aio_context[0] is always
safe...and catch future code changes that break this assumption.

Suggested-by: Hanna Czenczek 
Signed-off-by: Stefan Hajnoczi 
---
 hw/block/virtio-blk.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index e8b37fd5f4..a0735a9bca 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1825,6 +1825,7 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev)
  * Try to change the AioContext so that block jobs and other operations can
  * co-locate their activity in the same AioContext. If it fails, nevermind.
  */
+assert(nvqs > 0); /* enforced during ->realize() */
 r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0],
 _err);
 if (r < 0) {
-- 
2.43.0




[PATCH 1/5] virtio-blk: enforce iothread-vq-mapping validation

2024-02-05 Thread Stefan Hajnoczi
Hanna Czenczek  noticed that the safety of
`vq_aio_context[vq->value] = ctx;` with user-defined vq->value inputs is
not obvious.

The code is structured in validate() + apply() steps so input validation
is there, but it happens way earlier and there is nothing that
guarantees apply() can only be called with validated inputs.

This patch moves the validate() call inside the apply() function so
validation is guaranteed. I also added the bounds checking assertion
that Hanna suggested.

Signed-off-by: Stefan Hajnoczi 
---
 hw/block/virtio-blk.c | 192 +++---
 1 file changed, 107 insertions(+), 85 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 227d83569f..e8b37fd5f4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1485,6 +1485,72 @@ static int virtio_blk_load_device(VirtIODevice *vdev, 
QEMUFile *f,
 return 0;
 }
 
+static void virtio_resize_cb(void *opaque)
+{
+VirtIODevice *vdev = opaque;
+
+assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+virtio_notify_config(vdev);
+}
+
+static void virtio_blk_resize(void *opaque)
+{
+VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+
+/*
+ * virtio_notify_config() needs to acquire the BQL,
+ * so it can't be called from an iothread. Instead, schedule
+ * it to be run in the main context BH.
+ */
+aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
+}
+
+static void virtio_blk_ioeventfd_detach(VirtIOBlock *s)
+{
+VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+for (uint16_t i = 0; i < s->conf.num_queues; i++) {
+VirtQueue *vq = virtio_get_queue(vdev, i);
+virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]);
+}
+}
+
+static void virtio_blk_ioeventfd_attach(VirtIOBlock *s)
+{
+VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+for (uint16_t i = 0; i < s->conf.num_queues; i++) {
+VirtQueue *vq = virtio_get_queue(vdev, i);
+virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]);
+}
+}
+
+/* Suspend virtqueue ioeventfd processing during drain */
+static void virtio_blk_drained_begin(void *opaque)
+{
+VirtIOBlock *s = opaque;
+
+if (s->ioeventfd_started) {
+virtio_blk_ioeventfd_detach(s);
+}
+}
+
+/* Resume virtqueue ioeventfd processing after drain */
+static void virtio_blk_drained_end(void *opaque)
+{
+VirtIOBlock *s = opaque;
+
+if (s->ioeventfd_started) {
+virtio_blk_ioeventfd_attach(s);
+}
+}
+
+static const BlockDevOps virtio_block_ops = {
+.resize_cb = virtio_blk_resize,
+.drained_begin = virtio_blk_drained_begin,
+.drained_end   = virtio_blk_drained_end,
+};
+
 static bool
 validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
 uint16_t num_queues, Error **errp)
@@ -1547,81 +1613,33 @@ 
validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
 return true;
 }
 
-static void virtio_resize_cb(void *opaque)
-{
-VirtIODevice *vdev = opaque;
-
-assert(qemu_get_current_aio_context() == qemu_get_aio_context());
-virtio_notify_config(vdev);
-}
-
-static void virtio_blk_resize(void *opaque)
-{
-VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
-
-/*
- * virtio_notify_config() needs to acquire the BQL,
- * so it can't be called from an iothread. Instead, schedule
- * it to be run in the main context BH.
- */
-aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
-}
-
-static void virtio_blk_ioeventfd_detach(VirtIOBlock *s)
-{
-VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
-for (uint16_t i = 0; i < s->conf.num_queues; i++) {
-VirtQueue *vq = virtio_get_queue(vdev, i);
-virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]);
-}
-}
-
-static void virtio_blk_ioeventfd_attach(VirtIOBlock *s)
-{
-VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
-for (uint16_t i = 0; i < s->conf.num_queues; i++) {
-VirtQueue *vq = virtio_get_queue(vdev, i);
-virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]);
-}
-}
-
-/* Suspend virtqueue ioeventfd processing during drain */
-static void virtio_blk_drained_begin(void *opaque)
-{
-VirtIOBlock *s = opaque;
-
-if (s->ioeventfd_started) {
-virtio_blk_ioeventfd_detach(s);
-}
-}
-
-/* Resume virtqueue ioeventfd processing after drain */
-static void virtio_blk_drained_end(void *opaque)
-{
-VirtIOBlock *s = opaque;
-
-if (s->ioeventfd_started) {
-virtio_blk_ioeventfd_attach(s);
-}
-}
-
-static const BlockDevOps virtio_block_ops = {
-.resize_cb = virtio_blk_resize,
-.drained_begin = virtio_blk_drained_begin,
-.drained_end   = virtio_blk_drained_end,
-};
-
-/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */
-static void
-apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
- AioContext **vq_aio_context, uint16_t 

[PATCH 5/5] monitor: use aio_co_reschedule_self()

2024-02-05 Thread Stefan Hajnoczi
The aio_co_reschedule_self() API is designed to avoid the race
condition between scheduling the coroutine in another AioContext and
yielding.

The QMP dispatch code uses the open-coded version that appears
susceptible to the race condition at first glance:

  aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self());
  qemu_coroutine_yield();

The code is actually safe because the iohandler and qemu_aio_context
AioContext run under the Big QEMU Lock. Nevertheless, set a good example
and use aio_co_reschedule_self() so it's obvious that there is no race.

Suggested-by: Hanna Reitz 
Signed-off-by: Stefan Hajnoczi 
---
 qapi/qmp-dispatch.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 176b549473..f3488afeef 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -212,8 +212,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList 
*cmds, QObject *requ
  * executing the command handler so that it can make progress if it
  * involves an AIO_WAIT_WHILE().
  */
-aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self());
-qemu_coroutine_yield();
+aio_co_reschedule_self(qemu_get_aio_context());
 }
 
 monitor_set_cur(qemu_coroutine_self(), cur_mon);
@@ -227,9 +226,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList 
*cmds, QObject *requ
  * Move back to iohandler_ctx so that nested event loops for
  * qemu_aio_context don't start new monitor commands.
  */
-aio_co_schedule(iohandler_get_aio_context(),
-qemu_coroutine_self());
-qemu_coroutine_yield();
+aio_co_reschedule_self(iohandler_get_aio_context());
 }
 } else {
/*
-- 
2.43.0




Re: [PATCH] iotests: give tempdir an identifying name

2024-02-05 Thread Michael Tokarev

05.02.2024 18:51, Daniel P. Berrangé wrote:

If something goes wrong causing the iotests not to cleanup their
temporary directory, it is useful if the dir had an identifying
name to show what is to blame.

Signed-off-by: Daniel P. Berrangé 


Revieved-by: Michael Tokarev 

Thank you again for the quick good work!

/mjt


  tests/qemu-iotests/testenv.py | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py
index 3ff38f2661..588f30a4f1 100644
--- a/tests/qemu-iotests/testenv.py
+++ b/tests/qemu-iotests/testenv.py
@@ -126,7 +126,7 @@ def init_directories(self) -> None:
  self.tmp_sock_dir = False
  Path(self.sock_dir).mkdir(parents=True, exist_ok=True)
  except KeyError:
-self.sock_dir = tempfile.mkdtemp()
+self.sock_dir = tempfile.mkdtemp(prefix="qemu-iotests-")
  self.tmp_sock_dir = True
  
  self.sample_img_dir = os.getenv('SAMPLE_IMG_DIR',





Re: [PATCH] iotests: fix leak of tmpdir in dry-run mode

2024-02-05 Thread Michael Tokarev

05.02.2024 18:40, Daniel P. Berrangé :

Creating an instance of the 'TestEnv' class will create a temporary
directory. This dir is only deleted, however, in the __exit__ handler
invoked by a context manager.

In dry-run mode, we don't use the TestEnv via a context manager, so
were leaking the temporary directory. Since meson invokes 'check'
5 times on each configure run, developers /tmp was filling up with
empty temporary directories.

Signed-off-by: Daniel P. Berrangé 
---
  tests/qemu-iotests/check | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index f2e9d27dcf..56d88ca423 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -184,7 +184,8 @@ if __name__ == '__main__':
  sys.exit(str(e))
  
  if args.dry_run:

-print('\n'.join([os.path.basename(t) for t in tests]))
+with env:
+print('\n'.join([os.path.basename(t) for t in tests]))
  else:
  with TestRunner(env, tap=args.tap,
  color=args.color) as tr:


Reviewed-by: Michael Tokarev 

(with my limited understanding of this code)

Thanks!

/mjt



[PATCH] iotests: give tempdir an identifying name

2024-02-05 Thread Daniel P . Berrangé
If something goes wrong causing the iotests not to cleanup their
temporary directory, it is useful if the dir had an identifying
name to show what is to blame.

Signed-off-by: Daniel P. Berrangé 
---
 tests/qemu-iotests/testenv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py
index 3ff38f2661..588f30a4f1 100644
--- a/tests/qemu-iotests/testenv.py
+++ b/tests/qemu-iotests/testenv.py
@@ -126,7 +126,7 @@ def init_directories(self) -> None:
 self.tmp_sock_dir = False
 Path(self.sock_dir).mkdir(parents=True, exist_ok=True)
 except KeyError:
-self.sock_dir = tempfile.mkdtemp()
+self.sock_dir = tempfile.mkdtemp(prefix="qemu-iotests-")
 self.tmp_sock_dir = True
 
 self.sample_img_dir = os.getenv('SAMPLE_IMG_DIR',
-- 
2.43.0




Re: [PATCH] iotests: fix leak of tmpdir in dry-run mode

2024-02-05 Thread Peter Maydell
On Mon, 5 Feb 2024 at 15:41, Daniel P. Berrangé  wrote:
>
> Creating an instance of the 'TestEnv' class will create a temporary
> directory. This dir is only deleted, however, in the __exit__ handler
> invoked by a context manager.
>
> In dry-run mode, we don't use the TestEnv via a context manager, so
> were leaking the temporary directory. Since meson invokes 'check'
> 5 times on each configure run, developers /tmp was filling up with
> empty temporary directories.
>
> Signed-off-by: Daniel P. Berrangé 

Could we also arrange for the temp directory to be created
with a name that makes it easy to identify what has created it?
Very generic names like "/tmp/tmp" are very hard to
pin down to the code that created them.

thanks
-- PMM



[PATCH] iotests: fix leak of tmpdir in dry-run mode

2024-02-05 Thread Daniel P . Berrangé
Creating an instance of the 'TestEnv' class will create a temporary
directory. This dir is only deleted, however, in the __exit__ handler
invoked by a context manager.

In dry-run mode, we don't use the TestEnv via a context manager, so
were leaking the temporary directory. Since meson invokes 'check'
5 times on each configure run, developers /tmp was filling up with
empty temporary directories.

Signed-off-by: Daniel P. Berrangé 
---
 tests/qemu-iotests/check | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index f2e9d27dcf..56d88ca423 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -184,7 +184,8 @@ if __name__ == '__main__':
 sys.exit(str(e))
 
 if args.dry_run:
-print('\n'.join([os.path.basename(t) for t in tests]))
+with env:
+print('\n'.join([os.path.basename(t) for t in tests]))
 else:
 with TestRunner(env, tap=args.tap,
 color=args.color) as tr:
-- 
2.43.0




Re: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE compatibility

2024-02-05 Thread Kevin Wolf
Am 05.02.2024 um 13:22 hat Eugenio Perez Martin geschrieben:
> On Fri, Feb 2, 2024 at 2:25 PM Kevin Wolf  wrote:
> >
> > VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> > status flag is set; with the current API of the kernel module, it is
> > impossible to enable the opposite order in our block export code because
> > userspace is not notified when a virtqueue is enabled.
> >
> > This requirement also mathces the normal initialisation order as done by
> > the generic vhost code in QEMU. However, commit 6c482547 accidentally
> > changed the order for vdpa-dev and broke access to VDUSE devices with
> > this.
> >
> > This changes vdpa-dev to use the normal order again and use the standard
> > vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> > used with vdpa-dev again after this fix.
> >
> > Cc: qemu-sta...@nongnu.org
> > Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> > Signed-off-by: Kevin Wolf 
> > ---
> >  hw/virtio/vdpa-dev.c   |  5 +
> >  hw/virtio/vhost-vdpa.c | 17 +
> >  2 files changed, 18 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> > index eb9ecea83b..13e87f06f6 100644
> > --- a/hw/virtio/vdpa-dev.c
> > +++ b/hw/virtio/vdpa-dev.c
> > @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice 
> > *vdev, Error **errp)
> >
> >  s->dev.acked_features = vdev->guest_features;
> >
> > -ret = vhost_dev_start(>dev, vdev, false);
> > +ret = vhost_dev_start(>dev, vdev, true);
> >  if (ret < 0) {
> >  error_setg_errno(errp, -ret, "Error starting vhost");
> >  goto err_guest_notifiers;
> >  }
> > -for (i = 0; i < s->dev.nvqs; ++i) {
> > -vhost_vdpa_set_vring_ready(>vdpa, i);
> > -}
> >  s->started = true;
> >
> >  /*
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 3a43beb312..c4574d56c5 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, 
> > unsigned idx)
> >  return r;
> >  }
> >
> > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> > +{
> > +struct vhost_vdpa *v = dev->opaque;
> > +unsigned int i;
> > +int ret;
> > +
> > +for (i = 0; i < dev->nvqs; ++i) {
> > +ret = vhost_vdpa_set_vring_ready(v, i);
> > +if (ret < 0) {
> > +return ret;
> > +}
> > +}
> > +
> > +return 0;
> > +}
> > +
> >  static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
> > int fd)
> >  {
> > @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
> >  .vhost_set_features = vhost_vdpa_set_features,
> >  .vhost_reset_device = vhost_vdpa_reset_device,
> >  .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> > +.vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> >  .vhost_get_config  = vhost_vdpa_get_config,
> >  .vhost_set_config = vhost_vdpa_set_config,
> >  .vhost_requires_shm_log = NULL,
> 
> vhost-vdpa net enables CVQ before dataplane ones to configure all the
> device in the destination of a live migration. To go back again to
> this callback would cause the device to enable the dataplane before
> virtqueues are configured again.

Not that it makes a difference, but I don't think it would actually be
going back. Even before your commit 6c482547, we were not making use of
the generic callback but just called the function in a slightly
different place (but less different than after commit 6c482547).

But anyway... Why don't the other vhost backend need the same for
vhost-net then? Do they just not support live migration?

I don't know the code well enough to say where the problem is, but if
vhost-vdpa networking code relies on the usual vhost operations not
being implemented and bypasses VhostOps to replace it, that sounds like
a design problem to me. Maybe VhostOps needs a new operation to enable
just a single virtqueue that can be used by the networking code instead?

> How does VDUSE userspace knows how many queues should enable? Can't
> the kernel perform the necessary actions after DRIVER_OK, like
> configuring the kick etc?

Not sure if I understand the question. The vdpa kernel interface always
enables individual queues, so the VDUSE userspace will enable whatever
queues it was asked to enable. The only restriction is that the queues
need to be enabled before setting DRIVER_OK.

The interface that enables all virtqueues at once seems to be just
.vhost_set_vring_enable in QEMU.

Kevin




Re: NVME hotplug support ?

2024-02-05 Thread Damien Hedde



On 1/29/24 16:35, Hannes Reinecke wrote:

On 1/29/24 14:13, Damien Hedde wrote:



On 1/24/24 08:47, Hannes Reinecke wrote:

On 1/24/24 07:52, Philippe Mathieu-Daudé wrote:

Hi Hannes,

[+Markus as QOM/QDev rubber duck]

On 23/1/24 13:40, Hannes Reinecke wrote:

On 1/23/24 11:59, Damien Hedde wrote:

Hi all,

We are currently looking into hotplugging nvme devices and it is 
currently not possible:

When nvme was introduced 2 years ago, the feature was disabled.

commit cc6fb6bc506e6c47ed604fcb7b7413dff0b7d845
Author: Klaus Jensen
Date:   Tue Jul 6 10:48:40 2021 +0200

    hw/nvme: mark nvme-subsys non-hotpluggable
    We currently lack the infrastructure to handle subsystem 
hotplugging, so

    disable it.


Do someone know what's lacking or anyone have some tips/idea of 
what we should develop to add the support ?


Problem is that the object model is messed up. In qemu namespaces 
are attached to controllers, which in turn are children of the PCI 
device.

There are subsystems, but these just reference the controller.

So if you hotunplug the PCI device you detach/destroy the 
controller and detach the namespaces from the controller.
But if you hotplug the PCI device again the NVMe controller will be 
attached to the PCI device, but the namespace are still be detached.


Klaus said he was going to fix that, and I dimly remember some patches
floating around. But apparently it never went anywhere.

Fundamental problem is that the NVMe hierarchy as per spec is 
incompatible with the qemu object model; qemu requires a strict

tree model where every object has exactly _one_ parent.


The modelling problem is not clear to me.
Do you have an example of how the NVMe hierarchy should be?


Sure.

As per NVMe spec we have this hierarchy:

  --->  subsys ---
 |    |
 |    V
controller  namespaces

There can be several controllers, and several
namespaces.
The initiator (ie the linux 'nvme' driver) connects
to a controller, queries the subsystem for the attached
namespaces, and presents each namespace as a block device.

For Qemu we have the problem that every device _must_ be
a direct descendant of the parent (expressed by the fact
that each 'parent' object is embedded in the device object).

So if we were to present a NVMe PCI device, the controller
must be derived from the PCI device:

pci -> controller

but now we have to express the NVMe hierarchy, too:

pci -> ctrl1 -> subsys1 -> namespace1

which actually works.
We can easily attach several namespaces:

pci -> ctrl1 ->subsys1 -> namespace2

For a single controller and a single subsystem.
However, as mentioned above, there can be _several_
controllers attached to the same subsystem.
So we can express the second controller:

pci -> ctrl2

but we cannot attach the controller to 'subsys1'
as then 'subsys1' would need to be derived from
'ctrl2', and not (as it is now) from 'ctrl1'.

The most logical step would be to have 'subsystems'
their own entity, independent of any controllers.
But then the block devices (which are derived from
the namespaces) could not be traced back
to the PCI device, and a PCI hotplug would not
'automatically' disconnect the nvme block devices.

Plus the subsystem would be independent from the NVMe
PCI devices, so you could have a subsystem with
no controllers attached. And one would wonder who
should be responsible for cleaning up that.



Thanks for the details !

My use case is the simple one with no nvme subsystem/namespaces:
- hotplug a pci nvme device (nvme controller) as in the following CLI 
(which automatically put the drive into a default namespace)


./qemu-system-aarch64 -nographic -M virt \
    -drive file=nvme0.disk,if=none,id=nvme-drive0 \
    -device nvme,serial=nvme0,id=nvmedev0,drive=nvme-drive0

In the simple tree approach where subsystems and namespaces are not 
shared by controllers. We could delete the whole nvme hiearchy under 
the controller while unplugging it ?


In your first message, you said
  > So if you hotunplug the PCI device you detach/destroy the controller
  > and detach the namespaces from the controller.
  > But if you hotplug the PCI device again the NVMe controller will be
  > attached to the PCI device, but the namespace are still be detached.

Do you mean that if we unplug the pci device we HAVE to keep some nvme 
objects so that if we plug the device back we can recover them ?
Or just that it's hard to unplug nvme objects if they are not real qom 
children of pci device ?


Key point for trying on PCI hotplug with qemu is to attach the PCI 
device to it's own PCI root port. Cf the mail from Klaus Jensen for 
details.


Cheers,

Hannes


Thanks a lot from both of you. I missed that.

Damien








Re: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE compatibility

2024-02-05 Thread Kevin Wolf
Am 02.02.2024 um 14:25 hat Kevin Wolf geschrieben:
> VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> status flag is set; with the current API of the kernel module, it is
> impossible to enable the opposite order in our block export code because
> userspace is not notified when a virtqueue is enabled.
> 
> This requirement also mathces the normal initialisation order as done by
> the generic vhost code in QEMU. However, commit 6c482547 accidentally
> changed the order for vdpa-dev and broke access to VDUSE devices with
> this.
> 
> This changes vdpa-dev to use the normal order again and use the standard
> vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> used with vdpa-dev again after this fix.
> 
> Cc: qemu-sta...@nongnu.org
> Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> Signed-off-by: Kevin Wolf 
> ---
>  hw/virtio/vdpa-dev.c   |  5 +
>  hw/virtio/vhost-vdpa.c | 17 +
>  2 files changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> index eb9ecea83b..13e87f06f6 100644
> --- a/hw/virtio/vdpa-dev.c
> +++ b/hw/virtio/vdpa-dev.c
> @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, 
> Error **errp)
>  
>  s->dev.acked_features = vdev->guest_features;
>  
> -ret = vhost_dev_start(>dev, vdev, false);
> +ret = vhost_dev_start(>dev, vdev, true);
>  if (ret < 0) {
>  error_setg_errno(errp, -ret, "Error starting vhost");
>  goto err_guest_notifiers;
>  }
> -for (i = 0; i < s->dev.nvqs; ++i) {
> -vhost_vdpa_set_vring_ready(>vdpa, i);
> -}
>  s->started = true;
>  
>  /*
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 3a43beb312..c4574d56c5 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, 
> unsigned idx)
>  return r;
>  }
>  
> +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> +{
> +struct vhost_vdpa *v = dev->opaque;
> +unsigned int i;
> +int ret;
> +
> +for (i = 0; i < dev->nvqs; ++i) {
> +ret = vhost_vdpa_set_vring_ready(v, i);
> +if (ret < 0) {
> +return ret;
> +}
> +}
> +
> +return 0;
> +}

Oops, this forgets to actually use the @enable parameter, and always
enables the queue even if the caller wanted to disable it.

I'll fix this in a v2, but I'd first like to see if something has to
change to address Stefano's concern, too.

Kevin




Re: [PATCH] tests/cdrom-test: Add cdrom test for LoongArch virt machine

2024-02-05 Thread Philippe Mathieu-Daudé

Hi Bibo,

On 5/2/24 03:13, Bibo Mao wrote:

The cdrom test skips to execute on LoongArch system with command
"make check", this patch enables cdrom test for LoongArch virt
machine platform.

With this patch, cdrom test passes to run on LoongArch virt
machine type.

Signed-off-by: Bibo Mao 
---
  tests/qtest/cdrom-test.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c
index 0945383789..c8b97d8d9a 100644
--- a/tests/qtest/cdrom-test.c
+++ b/tests/qtest/cdrom-test.c
@@ -271,6 +271,9 @@ int main(int argc, char **argv)
  const char *virtmachine[] = { "virt", NULL };
  add_cdrom_param_tests(virtmachine);
  }
+} else if (g_str_equal(arch, "loongarch64")) {
+const char *virtmachine[] = { "virt", NULL };
+add_cdrom_param_tests(virtmachine);


What is the default device used, virtio-blk-pci?




Re: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE compatibility

2024-02-05 Thread Eugenio Perez Martin
On Fri, Feb 2, 2024 at 2:25 PM Kevin Wolf  wrote:
>
> VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> status flag is set; with the current API of the kernel module, it is
> impossible to enable the opposite order in our block export code because
> userspace is not notified when a virtqueue is enabled.
>
> This requirement also mathces the normal initialisation order as done by
> the generic vhost code in QEMU. However, commit 6c482547 accidentally
> changed the order for vdpa-dev and broke access to VDUSE devices with
> this.
>
> This changes vdpa-dev to use the normal order again and use the standard
> vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> used with vdpa-dev again after this fix.
>
> Cc: qemu-sta...@nongnu.org
> Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> Signed-off-by: Kevin Wolf 
> ---
>  hw/virtio/vdpa-dev.c   |  5 +
>  hw/virtio/vhost-vdpa.c | 17 +
>  2 files changed, 18 insertions(+), 4 deletions(-)
>
> diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> index eb9ecea83b..13e87f06f6 100644
> --- a/hw/virtio/vdpa-dev.c
> +++ b/hw/virtio/vdpa-dev.c
> @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, 
> Error **errp)
>
>  s->dev.acked_features = vdev->guest_features;
>
> -ret = vhost_dev_start(>dev, vdev, false);
> +ret = vhost_dev_start(>dev, vdev, true);
>  if (ret < 0) {
>  error_setg_errno(errp, -ret, "Error starting vhost");
>  goto err_guest_notifiers;
>  }
> -for (i = 0; i < s->dev.nvqs; ++i) {
> -vhost_vdpa_set_vring_ready(>vdpa, i);
> -}
>  s->started = true;
>
>  /*
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 3a43beb312..c4574d56c5 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, 
> unsigned idx)
>  return r;
>  }
>
> +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> +{
> +struct vhost_vdpa *v = dev->opaque;
> +unsigned int i;
> +int ret;
> +
> +for (i = 0; i < dev->nvqs; ++i) {
> +ret = vhost_vdpa_set_vring_ready(v, i);
> +if (ret < 0) {
> +return ret;
> +}
> +}
> +
> +return 0;
> +}
> +
>  static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
> int fd)
>  {
> @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
>  .vhost_set_features = vhost_vdpa_set_features,
>  .vhost_reset_device = vhost_vdpa_reset_device,
>  .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> +.vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
>  .vhost_get_config  = vhost_vdpa_get_config,
>  .vhost_set_config = vhost_vdpa_set_config,
>  .vhost_requires_shm_log = NULL,
> --
> 2.43.0
>

vhost-vdpa net enables CVQ before dataplane ones to configure all the
device in the destination of a live migration. To go back again to
this callback would cause the device to enable the dataplane before
virtqueues are configured again.

How does VDUSE userspace knows how many queues should enable? Can't
the kernel perform the necessary actions after DRIVER_OK, like
configuring the kick etc?

Thanks!




Re: [PATCH] vdpa-dev: Fix initialisation order to restore VDUSE compatibility

2024-02-05 Thread Stefano Garzarella

On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:

VDUSE requires that virtqueues are first enabled before the DRIVER_OK
status flag is set; with the current API of the kernel module, it is
impossible to enable the opposite order in our block export code because
userspace is not notified when a virtqueue is enabled.


Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,
I'll start another thread about that, but in the meantime I agree that
we should fix QEMU since we need to work properly with old kernels as
well.



This requirement also mathces the normal initialisation order as done by
the generic vhost code in QEMU. However, commit 6c482547 accidentally
changed the order for vdpa-dev and broke access to VDUSE devices with
this.

This changes vdpa-dev to use the normal order again and use the standard
vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
used with vdpa-dev again after this fix.


I like this approach and the patch LGTM, but I'm a bit worried about
this function in hw/net/vhost_net.c:

int vhost_set_vring_enable(NetClientState *nc, int enable)
{
VHostNetState *net = get_vhost_net(nc);
const VhostOps *vhost_ops = net->dev.vhost_ops;

nc->vring_enable = enable;

if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
return vhost_ops->vhost_set_vring_enable(>dev, enable);
}

return 0;
}

@Eugenio, @Jason, should we change some things there if vhost-vdpa
implements the vhost_set_vring_enable callback?

Do you remember why we didn't implement it from the beginning?

Thanks,
Stefano



Cc: qemu-sta...@nongnu.org
Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
Signed-off-by: Kevin Wolf 
---
hw/virtio/vdpa-dev.c   |  5 +
hw/virtio/vhost-vdpa.c | 17 +
2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
index eb9ecea83b..13e87f06f6 100644
--- a/hw/virtio/vdpa-dev.c
+++ b/hw/virtio/vdpa-dev.c
@@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, 
Error **errp)

s->dev.acked_features = vdev->guest_features;

-ret = vhost_dev_start(>dev, vdev, false);
+ret = vhost_dev_start(>dev, vdev, true);
if (ret < 0) {
error_setg_errno(errp, -ret, "Error starting vhost");
goto err_guest_notifiers;
}
-for (i = 0; i < s->dev.nvqs; ++i) {
-vhost_vdpa_set_vring_ready(>vdpa, i);
-}
s->started = true;

/*
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 3a43beb312..c4574d56c5 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, 
unsigned idx)
return r;
}

+static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
+{
+struct vhost_vdpa *v = dev->opaque;
+unsigned int i;
+int ret;
+
+for (i = 0; i < dev->nvqs; ++i) {
+ret = vhost_vdpa_set_vring_ready(v, i);
+if (ret < 0) {
+return ret;
+}
+}
+
+return 0;
+}
+
static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
   int fd)
{
@@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
.vhost_set_features = vhost_vdpa_set_features,
.vhost_reset_device = vhost_vdpa_reset_device,
.vhost_get_vq_index = vhost_vdpa_get_vq_index,
+.vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
.vhost_get_config  = vhost_vdpa_get_config,
.vhost_set_config = vhost_vdpa_set_config,
.vhost_requires_shm_log = NULL,
--
2.43.0






Re: [PATCH] tests/cdrom-test: Add cdrom test for LoongArch virt machine

2024-02-05 Thread maobibo




On 2024/2/5 下午3:47, Thomas Huth wrote:

On 05/02/2024 03.13, Bibo Mao wrote:

The cdrom test skips to execute on LoongArch system with command
"make check"


Are you sure the test is marked with "skip"? ... it should at least test 
with the "none" machine...?
With the latest code, cdrom testcase passes to run. It is strange that 
about two weeks ago, the result displays SKIP, so I post this patch -:)


Regards
Bibo Mao




this patch enables cdrom test for LoongArch virt
machine platform.

With this patch, cdrom test passes to run on LoongArch virt
machine type.

Signed-off-by: Bibo Mao 
---
  tests/qtest/cdrom-test.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c
index 0945383789..c8b97d8d9a 100644
--- a/tests/qtest/cdrom-test.c
+++ b/tests/qtest/cdrom-test.c
@@ -271,6 +271,9 @@ int main(int argc, char **argv)
  const char *virtmachine[] = { "virt", NULL };
  add_cdrom_param_tests(virtmachine);
  }
+    } else if (g_str_equal(arch, "loongarch64")) {
+    const char *virtmachine[] = { "virt", NULL };
+    add_cdrom_param_tests(virtmachine);
  } else {
  const char *nonemachine[] = { "none", NULL };
  add_cdrom_param_tests(nonemachine);


Anyway, using the virt machine is certainly better than the "none" 
machine, so:

Acked-by: Thomas Huth