date:20230509

[PATCH v16 1/4] vhost: expose function vhost_dev_has_iommu()

2023-05-09 Thread Cindy Lu

To support vIOMMU in vdpa, need to exposed the function
vhost_dev_has_iommu, vdpa will use this function to check
if vIOMMU enable.

Signed-off-by: Cindy Lu 
---
 hw/virtio/vhost.c | 2 +-
 include/hw/virtio/vhost.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 746d130c74..23da579ce2 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -107,7 +107,7 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
 }
 }
 
-static bool vhost_dev_has_iommu(struct vhost_dev *dev)
+bool vhost_dev_has_iommu(struct vhost_dev *dev)
 {
 VirtIODevice *vdev = dev->vdev;
 
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index a52f273347..f7f10c8fb7 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -336,4 +336,5 @@ int vhost_dev_set_inflight(struct vhost_dev *dev,
struct vhost_inflight *inflight);
 int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
struct vhost_inflight *inflight);
+bool vhost_dev_has_iommu(struct vhost_dev *dev);
 #endif
-- 
2.34.3

[PATCH v16 4/4] vhost-vdpa: Add support for vIOMMU.

2023-05-09 Thread Cindy Lu

1. The vIOMMU support will make vDPA can work in IOMMU mode. This
will fix security issues while using the no-IOMMU mode.
To support this feature we need to add new functions for IOMMU MR adds and
deletes.

Also since the SVQ does not support vIOMMU yet, add the check for IOMMU
in vhost_vdpa_dev_start, if the SVQ and IOMMU enable at the same time
the function will return fail.

2. Skip the iova_max check vhost_vdpa_listener_skipped_section(). While
MR is IOMMU, move this check to vhost_vdpa_iommu_map_notify()

Verified in vp_vdpa and vdpa_sim_net driver

Signed-off-by: Cindy Lu 
---
 hw/virtio/vhost-vdpa.c | 145 +++--
 include/hw/virtio/vhost-vdpa.h |  11 +++
 2 files changed, 149 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 0c8c37e786..b3094e8a8b 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -26,6 +26,7 @@
 #include "cpu.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "hw/virtio/virtio-access.h"
 
 /*
  * Return one past the end of the end of section. Be careful with uint64_t
@@ -60,13 +61,21 @@ static bool 
vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
  iova_min, section->offset_within_address_space);
 return true;
 }
+/*
+ * While using vIOMMU, sometimes the section will be larger than iova_max,
+ * but the memory that actually maps is smaller, so move the check to
+ * function vhost_vdpa_iommu_map_notify(). That function will use the 
actual
+ * size that maps to the kernel
+ */
 
-llend = vhost_vdpa_section_end(section);
-if (int128_gt(llend, int128_make64(iova_max))) {
-error_report("RAM section out of device range (max=0x%" PRIx64
- ", end addr=0x%" PRIx64 ")",
- iova_max, int128_get64(llend));
-return true;
+if (!memory_region_is_iommu(section->mr)) {
+llend = vhost_vdpa_section_end(section);
+if (int128_gt(llend, int128_make64(iova_max))) {
+error_report("RAM section out of device range (max=0x%" PRIx64
+ ", end addr=0x%" PRIx64 ")",
+ iova_max, int128_get64(llend));
+return true;
+}
 }
 
 return false;
@@ -185,6 +194,115 @@ static void vhost_vdpa_listener_commit(MemoryListener 
*listener)
 v->iotlb_batch_begin_sent = false;
 }
 
+static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n);
+
+hwaddr iova = iotlb->iova + iommu->iommu_offset;
+struct vhost_vdpa *v = iommu->dev;
+void *vaddr;
+int ret;
+Int128 llend;
+
+if (iotlb->target_as != _space_memory) {
+error_report("Wrong target AS \"%s\", only system memory is allowed",
+ iotlb->target_as->name ? iotlb->target_as->name : "none");
+return;
+}
+RCU_READ_LOCK_GUARD();
+/* check if RAM section out of device range */
+llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova));
+if (int128_gt(llend, int128_make64(v->iova_range.last))) {
+error_report("RAM section out of device range (max=0x%" PRIx64
+ ", end addr=0x%" PRIx64 ")",
+ v->iova_range.last, int128_get64(llend));
+return;
+}
+
+if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
+bool read_only;
+
+if (!memory_get_xlat_addr(iotlb, , NULL, _only, NULL)) {
+return;
+}
+ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova,
+ iotlb->addr_mask + 1, vaddr, read_only);
+if (ret) {
+error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", "
+ "0x%" HWADDR_PRIx ", %p) = %d (%m)",
+ v, iova, iotlb->addr_mask + 1, vaddr, ret);
+}
+} else {
+ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
+   iotlb->addr_mask + 1);
+if (ret) {
+error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
+ "0x%" HWADDR_PRIx ") = %d (%m)",
+ v, iova, iotlb->addr_mask + 1, ret);
+}
+}
+}
+
+static void vhost_vdpa_iommu_region_add(MemoryListener *listener,
+MemoryRegionSection *section)
+{
+struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+
+struct vdpa_iommu *iommu;
+Int128 end;
+int iommu_idx;
+IOMMUMemoryRegion *iommu_mr;
+int ret;
+
+iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+
+iommu = g_malloc0(sizeof(*iommu));
+end = int128_add(int128_make64(section->offset_within_region),
+ section->size);
+end = int128_sub(end, int128_one());
+iommu_idx =

[PATCH v16 3/4] vhost-vdpa: Add check for full 64-bit in region delete

2023-05-09 Thread Cindy Lu

The unmap ioctl doesn't accept a full 64-bit span. So need to
add check for the section's size in vhost_vdpa_listener_region_del().

Signed-off-by: Cindy Lu 
---
 hw/virtio/vhost-vdpa.c | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 92c2413c76..0c8c37e786 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -316,10 +316,28 @@ static void vhost_vdpa_listener_region_del(MemoryListener 
*listener,
 vhost_iova_tree_remove(v->iova_tree, *result);
 }
 vhost_vdpa_iotlb_batch_begin_once(v);
+/*
+ * The unmap ioctl doesn't accept a full 64-bit. need to check it
+ */
+if (int128_eq(llsize, int128_2_64())) {
+llsize = int128_rshift(llsize, 1);
+ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
+   int128_get64(llsize));
+
+if (ret) {
+error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
+ "0x%" HWADDR_PRIx ") = %d (%m)",
+ v, iova, int128_get64(llsize), ret);
+}
+iova += int128_get64(llsize);
+}
 ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova,
int128_get64(llsize));
+
 if (ret) {
-error_report("vhost_vdpa dma unmap error!");
+error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
+ "0x%" HWADDR_PRIx ") = %d (%m)",
+ v, iova, int128_get64(llsize), ret);
 }
 
 memory_region_unref(section->mr);
-- 
2.34.3

[PATCH v16 0/4] vhost-vdpa: add support for vIOMMU

2023-05-09 Thread Cindy Lu

These patches are to support vIOMMU in vdpa device

changes in V3
1. Move function vfio_get_xlat_addr to memory.c
2. Use the existing memory listener, while the MR is
iommu MR then call the function iommu_region_add/
iommu_region_del

changes in V4
1.make the comments in vfio_get_xlat_addr more general

changes in V5
1. Address the comments in the last version
2. Add a new arg in the function vfio_get_xlat_addr, which shows whether
the memory is backed by a discard manager. So the device can have its
own warning.

changes in V6
move the error_report for the unpopulated discard back to
memeory_get_xlat_addr

changes in V7
organize the error massage to avoid the duplicate information

changes in V8
Organize the code follow the comments in the last version

changes in V9
Organize the code follow the comments

changes in V10
Address the comments

changes in V11
Address the comments
fix the crash found in test

changes in V12
Address the comments, squash patch 1 into the next patch
improve the code style issue

changes in V13
fail to start if IOMMU and svq enable at same time
improve the code style issue

changes in V14
Address the comments

changes in V15
Address the comments

changes in V16
remove the batch mode operation in vIOMMU MR

Cindy Lu (4):
  vhost: expose function vhost_dev_has_iommu()
  vhost_vdpa: fix the input in trace_vhost_vdpa_listener_region_del()
  vhost-vdpa: Add check for full 64-bit in region delete
  vhost-vdpa: Add support for vIOMMU.

 hw/virtio/vhost-vdpa.c | 168 +++--
 hw/virtio/vhost.c  |   2 +-
 include/hw/virtio/vhost-vdpa.h |  11 +++
 include/hw/virtio/vhost.h  |   1 +
 4 files changed, 172 insertions(+), 10 deletions(-)

-- 
2.34.3

[PATCH v16 2/4] vhost_vdpa: fix the input in trace_vhost_vdpa_listener_region_del()

2023-05-09 Thread Cindy Lu

In trace_vhost_vdpa_listener_region_del, the value for llend
should change to int128_get64(int128_sub(llend, int128_one()))

Signed-off-by: Cindy Lu 
---
 hw/virtio/vhost-vdpa.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index bc6bad23d5..92c2413c76 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -288,7 +288,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener 
*listener,
 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
 llend = vhost_vdpa_section_end(section);
 
-trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend));
+trace_vhost_vdpa_listener_region_del(v, iova,
+int128_get64(int128_sub(llend, int128_one(;
 
 if (int128_ge(int128_make64(iova), llend)) {
 return;
-- 
2.34.3

Re: [PULL 00/16] Misc patches for 2023-05-09

2023-05-09 Thread Richard Henderson


On 5/9/23 10:04, Paolo Bonzini wrote:

The following changes since commit 792f77f376adef944f9a03e601f6ad90c2f891b2:

   Merge tag 'pull-loongarch-20230506' ofhttps://gitlab.com/gaosong/qemu  into 
staging (2023-05-06 08:11:52 +0100)

are available in the Git repository at:

   https://gitlab.com/bonzini/qemu.git  tags/for-upstream

for you to fetch changes up to ef709860ea12ec59c4cd7373bd2fd7a4e50143ee:

   meson: leave unnecessary modules out of the build (2023-05-08 19:04:52 +0200)


* target/i386: improved EPYC models
* more removal of mb_read/mb_set
* bump _WIN32_WINNT to the Windows 8 API
* fix for modular builds with --disable-system


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/8.1 as 
appropriate.


r~

Re: [PATCH 1/1] e1000e: Fix tx/rx counters

2023-05-09 Thread Jason Wang




在 2023/4/10 23:33, Akihiko Odaki 写道:

On 2023/04/11 0:27, timothee.coca...@gmail.com wrote:

The bytes and packets counter registers are cleared on read.

Copying the "total counter" registers to the "good counter" registers 
has

side effects.
If the "total" register is never read by the OS, it only gets 
incremented.

This leads to exponential growth of the "good" register.

This commit increments the counters individually to avoid this.

Signed-off-by: Timothée Cocault 


Reviewed-by: Akihiko Odaki 



Queued.

Thanks





---
  hw/net/e1000.c | 5 ++---
  hw/net/e1000e_core.c   | 5 ++---
  hw/net/e1000x_common.c | 5 ++---
  hw/net/igb_core.c  | 5 ++---
  4 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 23d660619f..59bacb5d3b 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -637,9 +637,8 @@ xmit_seg(E1000State *s)
    e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
  e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
-    s->mac_reg[GPTC] = s->mac_reg[TPT];
-    s->mac_reg[GOTCL] = s->mac_reg[TOTL];
-    s->mac_reg[GOTCH] = s->mac_reg[TOTH];
+    e1000x_inc_reg_if_not_full(s->mac_reg, GPTC);
+    e1000x_grow_8reg_if_not_full(s->mac_reg, GOTCL, s->tx.size + 4);
  }
    static void
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index c0c09b6965..cfa3f55e96 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -711,9 +711,8 @@ e1000e_on_tx_done_update_stats(E1000ECore *core, 
struct NetTxPkt *tx_pkt)

  g_assert_not_reached();
  }
  -    core->mac[GPTC] = core->mac[TPT];
-    core->mac[GOTCL] = core->mac[TOTL];
-    core->mac[GOTCH] = core->mac[TOTH];
+    e1000x_inc_reg_if_not_full(core->mac, GPTC);
+    e1000x_grow_8reg_if_not_full(core->mac, GOTCL, tot_len);
  }
    static void
diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c
index b844af590a..4c8e7dcf70 100644
--- a/hw/net/e1000x_common.c
+++ b/hw/net/e1000x_common.c
@@ -220,15 +220,14 @@ e1000x_update_rx_total_stats(uint32_t *mac,
    e1000x_increase_size_stats(mac, PRCregs, data_fcs_size);
  e1000x_inc_reg_if_not_full(mac, TPR);
-    mac[GPRC] = mac[TPR];
+    e1000x_inc_reg_if_not_full(mac, GPRC);
  /* TOR - Total Octets Received:
  * This register includes bytes received in a packet from the 

  * Address> field through the  field, inclusively.
  * Always include FCS length (4) in size.
  */
  e1000x_grow_8reg_if_not_full(mac, TORL, data_size + 4);
-    mac[GORCL] = mac[TORL];
-    mac[GORCH] = mac[TORH];
+    e1000x_grow_8reg_if_not_full(mac, GORCL, data_size + 4);
  }
    void
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index d733fed6cf..826e7a6cf1 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -538,9 +538,8 @@ igb_on_tx_done_update_stats(IGBCore *core, struct 
NetTxPkt *tx_pkt, int qn)

  g_assert_not_reached();
  }
  -    core->mac[GPTC] = core->mac[TPT];
-    core->mac[GOTCL] = core->mac[TOTL];
-    core->mac[GOTCH] = core->mac[TOTH];
+    e1000x_inc_reg_if_not_full(core->mac, GPTC);
+    e1000x_grow_8reg_if_not_full(core->mac, GOTCL, tot_len);
    if (core->mac[MRQC] & 1) {
  uint16_t pool = qn % IGB_NUM_VM_POOLS;

[PATCH] target/riscv: Move zc* out of the experimental properties

2023-05-09 Thread Weiwei Li

Zc* extensions (version 1.0) are ratified.

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
 target/riscv/cpu.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index db0875fb43..99ed9cb80e 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1571,6 +1571,14 @@ static Property riscv_cpu_extensions[] = {
 
 DEFINE_PROP_BOOL("zmmul", RISCVCPU, cfg.ext_zmmul, false),
 
+DEFINE_PROP_BOOL("zca", RISCVCPU, cfg.ext_zca, false),
+DEFINE_PROP_BOOL("zcb", RISCVCPU, cfg.ext_zcb, false),
+DEFINE_PROP_BOOL("zcd", RISCVCPU, cfg.ext_zcd, false),
+DEFINE_PROP_BOOL("zce", RISCVCPU, cfg.ext_zce, false),
+DEFINE_PROP_BOOL("zcf", RISCVCPU, cfg.ext_zcf, false),
+DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false),
+DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false),
+
 /* Vendor-specific custom extensions */
 DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false),
 DEFINE_PROP_BOOL("xtheadbb", RISCVCPU, cfg.ext_xtheadbb, false),
@@ -1588,14 +1596,6 @@ static Property riscv_cpu_extensions[] = {
 /* These are experimental so mark with 'x-' */
 DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false),
 
-DEFINE_PROP_BOOL("x-zca", RISCVCPU, cfg.ext_zca, false),
-DEFINE_PROP_BOOL("x-zcb", RISCVCPU, cfg.ext_zcb, false),
-DEFINE_PROP_BOOL("x-zcd", RISCVCPU, cfg.ext_zcd, false),
-DEFINE_PROP_BOOL("x-zce", RISCVCPU, cfg.ext_zce, false),
-DEFINE_PROP_BOOL("x-zcf", RISCVCPU, cfg.ext_zcf, false),
-DEFINE_PROP_BOOL("x-zcmp", RISCVCPU, cfg.ext_zcmp, false),
-DEFINE_PROP_BOOL("x-zcmt", RISCVCPU, cfg.ext_zcmt, false),
-
 /* ePMP 0.9.3 */
 DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
 DEFINE_PROP_BOOL("x-smaia", RISCVCPU, cfg.ext_smaia, false),
-- 
2.25.1

Re: [PATCH RESEND] vhost: fix possible wrap in SVQ descriptor ring

2023-05-09 Thread Lei Yang

QE applied this patch to do sanity testing on vhost-net, there is no
any regression problem.

Tested-by: Lei Yang 



On Tue, May 9, 2023 at 1:28 AM Eugenio Perez Martin  wrote:
>
> On Sat, May 6, 2023 at 5:01 PM Hawkins Jiawei  wrote:
> >
> > QEMU invokes vhost_svq_add() when adding a guest's element into SVQ.
> > In vhost_svq_add(), it uses vhost_svq_available_slots() to check
> > whether QEMU can add the element into the SVQ. If there is
> > enough space, then QEMU combines some out descriptors and
> > some in descriptors into one descriptor chain, and add it into
> > svq->vring.desc by vhost_svq_vring_write_descs().
> >
> > Yet the problem is that, `svq->shadow_avail_idx - svq->shadow_used_idx`
> > in vhost_svq_available_slots() return the number of occupied elements,
> > or the number of descriptor chains, instead of the number of occupied
> > descriptors, which may cause wrapping in SVQ descriptor ring.
> >
> > Here is an example. In vhost_handle_guest_kick(), QEMU forwards
> > as many available buffers to device by virtqueue_pop() and
> > vhost_svq_add_element(). virtqueue_pop() return a guest's element,
> > and use vhost_svq_add_elemnt(), a wrapper to vhost_svq_add(), to
> > add this element into SVQ. If QEMU invokes virtqueue_pop() and
> > vhost_svq_add_element() `svq->vring.num` times, vhost_svq_available_slots()
> > thinks QEMU just ran out of slots and everything should work fine.
> > But in fact, virtqueue_pop() return `svq-vring.num` elements or
> > descriptor chains, more than `svq->vring.num` descriptors, due to
> > guest memory fragmentation, and this cause wrapping in SVQ descriptor ring.
> >
>
> The bug is valid even before marking the descriptors used. If the
> guest memory is fragmented, SVQ must add chains so it can try to add
> more descriptors than possible.
>
> > Therefore, this patch adds `num_free` field in VhostShadowVirtqueue
> > structure, updates this field in vhost_svq_add() and
> > vhost_svq_get_buf(), to record the number of free descriptors.
> > Then we can avoid wrap in SVQ descriptor ring by refactoring
> > vhost_svq_available_slots().
> >
> > Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding")
> > Signed-off-by: Hawkins Jiawei 
> > ---
> >  hw/virtio/vhost-shadow-virtqueue.c | 9 -
> >  hw/virtio/vhost-shadow-virtqueue.h | 3 +++
> >  2 files changed, 11 insertions(+), 1 deletion(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index 8361e70d1b..e1c6952b10 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -68,7 +68,7 @@ bool vhost_svq_valid_features(uint64_t features, Error 
> > **errp)
> >   */
> >  static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
> >  {
> > -return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
> > +return svq->num_free;
> >  }
> >
> >  /**
> > @@ -263,6 +263,9 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const 
> > struct iovec *out_sg,
> >  return -EINVAL;
> >  }
> >
> > +/* Update the size of SVQ vring free descriptors */
> > +svq->num_free -= ndescs;
> > +
> >  svq->desc_state[qemu_head].elem = elem;
> >  svq->desc_state[qemu_head].ndescs = ndescs;
> >  vhost_svq_kick(svq);
> > @@ -450,6 +453,9 @@ static VirtQueueElement 
> > *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
> >  svq->desc_next[last_used_chain] = svq->free_head;
> >  svq->free_head = used_elem.id;
> >
> > +/* Update the size of SVQ vring free descriptors */
>
> No need for this comment.
>
> Apart from that,
>
> Acked-by: Eugenio Pérez 
>
> > +svq->num_free += num;
> > +
> >  *len = used_elem.len;
> >  return g_steal_pointer(>desc_state[used_elem.id].elem);
> >  }
> > @@ -659,6 +665,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
> > VirtIODevice *vdev,
> >  svq->iova_tree = iova_tree;
> >
> >  svq->vring.num = virtio_queue_get_num(vdev, 
> > virtio_get_queue_index(vq));
> > +svq->num_free = svq->vring.num;
> >  driver_size = vhost_svq_driver_area_size(svq);
> >  device_size = vhost_svq_device_area_size(svq);
> >  svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), 
> > driver_size);
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> > b/hw/virtio/vhost-shadow-virtqueue.h
> > index 926a4897b1..6efe051a70 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > @@ -107,6 +107,9 @@ typedef struct VhostShadowVirtqueue {
> >
> >  /* Next head to consume from the device */
> >  uint16_t last_used_idx;
> > +
> > +/* Size of SVQ vring free descriptors */
> > +uint16_t num_free;
> >  } VhostShadowVirtqueue;
> >
> >  bool vhost_svq_valid_features(uint64_t features, Error **errp);
> > --
> > 2.25.1
> >
>
>

Re: [PATCH v2] piix: fix regression during unplug in Xen HVM domUs

2023-05-09 Thread Olaf Hering

Resuming this old thread about an unfixed bug, which was introduced in qemu-4.2:

qemu ends up in piix_ide_reset from pci_unplug_disks.
This was not the case prior 4.2, the removed call to
qemu_register_reset(piix3_reset, d) in
ee358e919e385fdc79d59d0d47b4a81e349cd5c9 did apparently nothing.

In my debugging (with v8.0.0) it turned out the three pci_set_word
causes the domU to hang. In fact, it is just the last one:

   pci_set_byte(pci_conf + 0x20, 0x01);  /* BMIBA: 20-23h */

It changes the value from 0xc121 to 0x1.

The question is: what does this do in practice?

Starting with recent qemu (like 7.2), the domU sometimes proceeds with
these messages:

[1.631161] uhci_hcd :00:01.2: host system error, PCI problems?
[1.634965] uhci_hcd :00:01.2: host controller process error, 
something bad happened!
[1.634965] uhci_hcd :00:01.2: host controller halted, very bad!
[1.634965] uhci_hcd :00:01.2: HC died; cleaning up
Loading basic drivers...[2.398048] Disabling IRQ #23

Is anyone familiar enough with PIIX3 and knows how these devices are
interacting?

00:00.0 Host bridge: Intel Corporation 440FX - 82441FX PMC [Natoma] (rev 02)
00:01.0 ISA bridge: Intel Corporation 82371SB PIIX3 ISA [Natoma/Triton II]
00:01.1 IDE interface: Intel Corporation 82371SB PIIX3 IDE [Natoma/Triton II]
00:01.2 USB controller: Intel Corporation 82371SB PIIX3 USB [Natoma/Triton II] 
(rev 01)
00:01.3 Bridge: Intel Corporation 82371AB/EB/MB PIIX4 ACPI (rev 03)
00:02.0 Unassigned class [ff80]: XenSource, Inc. Xen Platform Device (rev 01)
00:03.0 VGA compatible controller: Cirrus Logic GD 5446

Thanks,
Olaf

On Thu, Mar 25, Paolo Bonzini wrote:

> On 25/03/21 12:12, Olaf Hering wrote:
> > Am Mon, 22 Mar 2021 18:09:17 -0400
> > schrieb John Snow :
> > 
> > > My understanding is that XEN has some extra disks that it unplugs when
> > > it later figures out it doesn't need them. How exactly this works is
> > > something I've not looked into too closely.
> > 
> > It has no extra disks, why would it?
> > 
> > I assume each virtualization variant has some sort of unplug if it has to 
> > support guests that lack PV/virtio/enlightened/whatever drivers.
> 
> No, it's Xen only and really should be legacy.  Ideally one would just have
> devices supported at all levels from firmware to kernel.
> 
> > > So if these IDE devices have been "unplugged" already, we avoid
> > > resetting them here. What about this reset causes the bug you describe
> > > in the commit message?
> > > 
> > > Does this reset now happen earlier/later as compared to what it did
> > > prior to ee358e91 ?
> > 
> > Prior this commit, piix_ide_reset was only called when the entire
> > emulated machine was reset. Like: never. With this commit,
> > piix_ide_reset will be called from pci_piix3_xen_ide_unplug. For some
> > reason it confuses the emulated USB hardware. Why it does confused
> > it, no idea.
> 
> > I wonder what the purpose of the qdev_reset_all() call really is. It
> > is 10 years old. It might be stale.
> 
> piix_ide_reset is only calling ide_bus_reset, and from there ide_reset and
> bmdma_reset.  All of these functions do just two things: reset internal
> registers and ensure pending I/O is completed or canceled.  The latter is
> indeed unnecessary; drain/flush/detach is already done before the call to
> qdev_reset_all.
> 
> But the fact that it breaks USB is weird.  That's the part that needs to be
> debugged, because changing IDE to unbreak USB needs an explanation even if
> it's the right thing to do.
> 
> If you don't want to debug it, removing the qdev_reset_all call might do the
> job; you'll have to see what the Xen maintainers think of it.  But if you
> don't debug the USB issue now, it will come back later almost surely.
> 
> Paolo

signature.asc
Description: Digital signature

Re: [SeaBIOS] [PATCH v3 0/6] misc tweaks for kvm and the 64bit pci window

2023-05-09 Thread Kevin O'Connor

On Fri, May 05, 2023 at 09:11:11AM +0200, Gerd Hoffmann wrote:
> v3 changes:
>  - rename variables, use u8 for CPULongMode.
> v2 changes:
>  - e820 conflict fix

Thanks.  Looks fine to me.

-Kevin


> 
> Gerd Hoffmann (6):
>   better kvm detection
>   detect physical address space size
>   move 64bit pci window to end of address space
>   be less conservative with the 64bit pci io window
>   qemu: log reservations in fw_cfg e820 table
>   check for e820 conflict
> 
>  src/e820map.h |  1 +
>  src/fw/paravirt.h |  2 ++
>  src/e820map.c | 15 +
>  src/fw/paravirt.c | 86 ++-
>  src/fw/pciinit.c  | 20 ++-
>  5 files changed, 114 insertions(+), 10 deletions(-)
> 
> -- 
> 2.40.1
> 
> ___
> SeaBIOS mailing list -- seab...@seabios.org
> To unsubscribe send an email to seabios-le...@seabios.org

[PULL 3/3] vfio/pci: Static Resizable BAR capability

2023-05-09 Thread Alex Williamson

The PCI Resizable BAR (ReBAR) capability is currently hidden from the
VM because the protocol for interacting with the capability does not
support a mechanism for the device to reject an advertised supported
BAR size.  However, when assigned to a VM, the act of resizing the
BAR requires adjustment of host resources for the device, which
absolutely can fail.  Linux does not currently allow us to reserve
resources for the device independent of the current usage.

The only writable field within the ReBAR capability is the BAR Size
register.  The PCIe spec indicates that when written, the device
should immediately begin to operate with the provided BAR size.  The
spec however also notes that software must only write values
corresponding to supported sizes as indicated in the capability and
control registers.  Writing unsupported sizes produces undefined
results.  Therefore, if the hypervisor were to virtualize the
capability and control registers such that the current size is the
only indicated available size, then a write of anything other than
the current size falls into the category of undefined behavior,
where we can essentially expose the modified ReBAR capability as
read-only.

This may seem pointless, but users have reported that virtualizing
the capability in this way not only allows guest software to expose
related features as available (even if only cosmetic), but in some
scenarios can resolve guest driver issues.  Additionally, no
regressions in behavior have been reported for this change.

A caveat here is that the PCIe spec requires for compatibility that
devices report support for a size in the range of 1MB to 512GB,
therefore if the current BAR size falls outside that range we revert
to hiding the capability.

Reviewed-by: Cédric Le Goater 
Link: 
https://lore.kernel.org/r/20230505232308.2869912-1-alex.william...@redhat.com
Signed-off-by: Alex Williamson 
---
 hw/vfio/pci.c | 54 ++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index cf27f28936cb..bf27a3990564 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2066,6 +2066,54 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t 
pos, Error **errp)
 return 0;
 }
 
+static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos)
+{
+uint32_t ctrl;
+int i, nbar;
+
+ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL);
+nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT;
+
+for (i = 0; i < nbar; i++) {
+uint32_t cap;
+int size;
+
+ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 
8));
+size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT;
+
+/* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */
+cap = size <= 27 ? 1U << (size + 4) : 0;
+
+/*
+ * The PCIe spec (v6.0.1, 7.8.6) requires HW to support at least one
+ * size in the range 1MB to 512GB.  We intend to mask all sizes except
+ * the one currently enabled in the size field, therefore if it's
+ * outside the range, hide the whole capability as this virtualization
+ * trick won't work.  If >512GB resizable BARs start to appear, we
+ * might need an opt-in or reservation scheme in the kernel.
+ */
+if (!(cap & PCI_REBAR_CAP_SIZES)) {
+return -EINVAL;
+}
+
+/* Hide all sizes reported in the ctrl reg per above requirement. */
+ctrl &= (PCI_REBAR_CTRL_BAR_SIZE |
+ PCI_REBAR_CTRL_NBAR_MASK |
+ PCI_REBAR_CTRL_BAR_IDX);
+
+/*
+ * The BAR size field is RW, however we've mangled the capability
+ * register such that we only report a single size, ie. the current
+ * BAR size.  A write of an unsupported value is undefined, therefore
+ * the register field is essentially RO.
+ */
+vfio_add_emulated_long(vdev, pos + PCI_REBAR_CAP + (i * 8), cap, ~0);
+vfio_add_emulated_long(vdev, pos + PCI_REBAR_CTRL + (i * 8), ctrl, ~0);
+}
+
+return 0;
+}
+
 static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
 {
 PCIDevice *pdev = >pdev;
@@ -2139,9 +2187,13 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
 case 0: /* kernel masked capability */
 case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
 case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */
-case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */
 trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
 break;
+case PCI_EXT_CAP_ID_REBAR:
+if (!vfio_setup_rebar_ecap(vdev, next)) {
+pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+}
+break;
 default:
 pcie_add_capability(pdev, cap_id, cap_ver, next, size);
 }

[PULL 0/3] VFIO updates 2023-05-09

2023-05-09 Thread Alex Williamson

The following changes since commit 271477b59e723250f17a7e20f139262057921b6a:

  Merge tag 'compression-code-pull-request' of 
https://gitlab.com/juan.quintela/qemu into staging (2023-05-08 20:38:05 +0100)

are available in the Git repository at:

  https://gitlab.com/alex.williamson/qemu.git tags/vfio-updates-20230509.0

for you to fetch changes up to b5048a4cbfa0362abc720b5198fe9a35441bf5fe:

  vfio/pci: Static Resizable BAR capability (2023-05-09 09:30:13 -0600)


VFIO updates 2023-05-09

 * Add vf-token device option allowing QEMU to assign VFs where the PF
   is managed by a userspace driver. (Minwoo Im)

 * Skip log_sync during migration setup as a potential source of failure
   and likely source of redundancy. (Avihai Horon)

 * Virtualize PCIe Resizable BAR capability rather than hiding it,
   exposing only the current size as available. (Alex Williamson)



Alex Williamson (1):
  vfio/pci: Static Resizable BAR capability

Avihai Horon (1):
  vfio/migration: Skip log_sync during migration SETUP state

Minwoo Im (1):
  vfio/pci: add support for VF token

 hw/vfio/common.c |  3 ++-
 hw/vfio/pci.c| 67 ++--
 hw/vfio/pci.h|  1 +
 3 files changed, 68 insertions(+), 3 deletions(-)

-- 
2.39.2

[PULL 1/3] vfio/pci: add support for VF token

2023-05-09 Thread Alex Williamson

From: Minwoo Im 

VF token was introduced [1] to kernel vfio-pci along with SR-IOV
support [2].  This patch adds support VF token among PF and VF(s). To
passthu PCIe VF to a VM, kernel >= v5.7 needs this.

It can be configured with UUID like:

  -device vfio-pci,host=:BB:DD:F,vf-token=,...

[1] 
https://lore.kernel.org/linux-pci/158396393244.5601.10297430724964025753.st...@gimli.home/
[2] 
https://lore.kernel.org/linux-pci/158396044753.5601.14804870681174789709.st...@gimli.home/

Cc: Alex Williamson 
Signed-off-by: Minwoo Im 
Reviewed-by: Klaus Jensen 
Link: 
https://lore.kernel.org/r/20230320073522epcms2p48f682ecdb73e0ae1a4850ad0712fd780@epcms2p4
Signed-off-by: Alex Williamson 
---
 hw/vfio/pci.c | 13 -
 hw/vfio/pci.h |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index ec9a854361ac..cf27f28936cb 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2856,6 +2856,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 int groupid;
 int i, ret;
 bool is_mdev;
+char uuid[UUID_FMT_LEN];
+char *name;
 
 if (!vbasedev->sysfsdev) {
 if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -2936,7 +2938,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 goto error;
 }
 
-ret = vfio_get_device(group, vbasedev->name, vbasedev, errp);
+if (!qemu_uuid_is_null(>vf_token)) {
+qemu_uuid_unparse(>vf_token, uuid);
+name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid);
+} else {
+name = vbasedev->name;
+}
+
+ret = vfio_get_device(group, name, vbasedev, errp);
+g_free(name);
 if (ret) {
 vfio_put_group(group);
 goto error;
@@ -3268,6 +3278,7 @@ static void vfio_instance_init(Object *obj)
 
 static Property vfio_pci_dev_properties[] = {
 DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
+DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token),
 DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev),
 DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice,
 vbasedev.pre_copy_dirty_page_tracking,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 177abcc8fb67..2674476d6c77 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -137,6 +137,7 @@ struct VFIOPCIDevice {
 VFIOVGA *vga; /* 0xa, 0x3b0, 0x3c0 */
 void *igd_opregion;
 PCIHostDeviceAddress host;
+QemuUUID vf_token;
 EventNotifier err_notifier;
 EventNotifier req_notifier;
 int (*resetfn)(struct VFIOPCIDevice *);
-- 
2.39.2

[PULL 2/3] vfio/migration: Skip log_sync during migration SETUP state

2023-05-09 Thread Alex Williamson

From: Avihai Horon 

Currently, VFIO log_sync can be issued while migration is in SETUP
state. However, doing this log_sync is at best redundant and at worst
can fail.

Redundant -- all RAM is marked dirty in migration SETUP state and is
transferred only after migration is set to ACTIVE state, so doing
log_sync during migration SETUP is pointless.

Can fail -- there is a time window, between setting migration state to
SETUP and starting dirty tracking by RAM save_live_setup handler, during
which dirty tracking is still not started. Any VFIO log_sync call that
is issued during this time window will fail. For example, this error can
be triggered by migrating a VM when a GUI is active, which constantly
calls log_sync.

Fix it by skipping VFIO log_sync while migration is in SETUP state.

Fixes: 758b96b61d5c ("vfio/migrate: Move switch of dirty tracking into 
vfio_memory_listener")
Signed-off-by: Avihai Horon 
Link: https://lore.kernel.org/r/2023040313.6422-1-avih...@nvidia.com
Signed-off-by: Alex Williamson 
---
 hw/vfio/common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 4d01ea351515..78358ede2764 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -478,7 +478,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer 
*container)
 VFIODevice *vbasedev;
 MigrationState *ms = migrate_get_current();
 
-if (!migration_is_setup_or_active(ms->state)) {
+if (ms->state != MIGRATION_STATUS_ACTIVE &&
+ms->state != MIGRATION_STATUS_DEVICE) {
 return false;
 }
 
-- 
2.39.2

Re: [PATCH 11/11] cutils: Improve qemu_strtosz handling of fractions

2023-05-09 Thread Eric Blake

On Tue, May 09, 2023 at 07:54:30PM +0200, Hanna Czenczek wrote:
> On 08.05.23 22:03, Eric Blake wrote:
> > We have several limitations and bugs worth fixing; they are
> > inter-related enough that it is not worth splitting this patch into
> > smaller pieces:
> > 
> > * ".5k" should work to specify 512, just as "0.5k" does
> > * "1.k" and "1." + "9"*50 + "k" should both produce the same
> >result of 2048 after rounding
> > * "1." + "0"*350 + "1B" should not be treated the same as "1.0B";
> >underflow in the fraction should not be lost
> > * "7.99e99" and "7.99e999" look similar, but our code was doing a
> >read-out-of-bounds on the latter because it was not expecting ERANGE
> >due to overflow. While we document that scientific notation is not
> >supported, and the previous patch actually fixed
> >qemu_strtod_finite() to no longer return ERANGE overflows, it is
> >easier to pre-filter than to try and determine after the fact if
> >strtod() consumed more than we wanted.  Note that this is a
> >low-level semantic change (when endptr is not NULL, we can now
> >successfully parse with a scale of 'E' and then report trailing
> >junk, instead of failing outright with EINVAL); but an earlier
> >commit already argued that this is not a high-level semantic change
> >since the only caller passing in a non-NULL endptr also checks that
> >the tail is whitespace-only.
> > 
> > Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1629
> > Signed-off-by: Eric Blake 
> > ---
> >   tests/unit/test-cutils.c | 51 +++
> >   util/cutils.c| 89 
> >   2 files changed, 88 insertions(+), 52 deletions(-)
> 
> [...]
> 
> > diff --git a/util/cutils.c b/util/cutils.c
> > index 0e056a27a44..d1dfbc69d16 100644
> > --- a/util/cutils.c
> > +++ b/util/cutils.c
> 
> [...]
> 
> > @@ -246,27 +244,66 @@ static int do_strtosz(const char *nptr, const char 
> > **end,
> >   retval = -EINVAL;
> >   goto out;
> >   }
> > -} else if (*endptr == '.') {
> > +} else if (*endptr == '.' || (endptr == nptr && strchr(nptr, '.'))) {
> 
> What case is there where we have a fraction but *endptr != '.'?

Bigger context:

result = qemu_strtou64(nptr, , 10, );
// at this point, result is one of:
//  a. 0 - we parsed a decimal string, endptr points to any slop
//  b. -EINVAL - we could not recognize a decimal string: multiple reasons
//  b.1. nptr was NULL (endptr is NULL)
//  b.2. nptr was "" or otherwise whitespace only (endptr is nptr)
//  b.3. the first non-whitespace in nptr was not a sign or digit (endptr is 
nptr)
//  c. -ERANGE - we saw a decimal string, but it didn't fit in uint64 (endptr is
//past first digit)
if (retval == -ERANGE || !nptr) {
// filter out c. and b.1
goto out;
}
if (retval == 0 && val == 0 && (*endptr == 'x' || *endptr == 'X')) {
// a, where we must decipher between "0x", "00x", "0xjunk", "0x1", ...
// not changed by this patch, and where we give -EINVAL if we see any 
trailing
// slop like "0x1." or "0x1p"
} else  if (*endptr == '.' || (endptr == nptr && strchr(nptr, '.'))) {
// The left half is possible in both a. (such as "1.5k")
// and b.3. when '.' was the first slop byte (such as ".5k")
// The right half is possible only for b.3 when '.' was not the first slop
// (needed for covering " +.5k")
// At this point, b.2. has been filtered out

...

> 
> >   /*
> >* Input looks like a fraction.  Make sure even 1.k works
> > - * without fractional digits.  If we see an exponent, treat
> > - * the entire input as invalid instead.
> > + * without fractional digits.  strtod tries to treat 'e' as an
> > + * exponent, but we want to treat it as a scaling suffix;
> > + * doing this requires modifying a copy of the fraction.
> >*/
> > -double fraction;
> > +double fraction = 0.0;
> > 
> > -f = endptr;
> > -retval = qemu_strtod_finite(f, , );
> > -if (retval) {
> > +if (retval == 0 && *endptr == '.' && !isdigit(endptr[1])) {
> > +/* If we got here, we parsed at least one digit already. */
> >   endptr++;

The 'retval == 0' check could equally be written 'endptr > nptr' (the
two are synonymous based on the conditions of a.; we cannot get here
under b.3); the '*endptr == '.' is necessary so that if nptr=="1junk",
we use 'j' as the scaling suffix rather than trying to skip past a
non-present '.'; and the '!isdigit(endptr[1])' is necessary so that
"1.k" does not result in us trying to call strtod(".k") which would
fail for an unexpected EINVAL.  Basically, this branch handles all
cases where we've seen at least one digit and the only thing between
digits and a possible scaling suffix is a single '.', so strtod is not
worth using.

> > -} else if (memchr(f, 'e', endptr - f) || memchr(f,

RE: [PATCH] Remove test_vshuff from hvx_misc tests

2023-05-09 Thread Taylor Simpson




> -Original Message-
> From: Brian Cain 
> Sent: Tuesday, May 9, 2023 3:01 PM
> To: Taylor Simpson ; Marco Liebel (QUIC)
> ; qemu-devel@nongnu.org
> Cc: Matheus Bernardino (QUIC) 
> Subject: RE: [PATCH] Remove test_vshuff from hvx_misc tests
> 
> 
> 
> > -Original Message-
> > From: Taylor Simpson 
> > Sent: Tuesday, May 9, 2023 2:28 PM
> > To: Marco Liebel (QUIC) ; qemu-
> > de...@nongnu.org
> > Cc: Brian Cain ; Matheus Bernardino (QUIC)
> > 
> > Subject: RE: [PATCH] Remove test_vshuff from hvx_misc tests
> >
> >
> >
> > > -Original Message-
> > > From: Marco Liebel (QUIC) 
> > > Sent: Tuesday, May 9, 2023 1:43 PM
> > > To: qemu-devel@nongnu.org
> > > Cc: Taylor Simpson ; Brian Cain
> > > ; Matheus Bernardino (QUIC)
> > > ; Marco Liebel (QUIC)
> > > 
> > > Subject: [PATCH] Remove test_vshuff from hvx_misc tests
> > >
> > > test_vshuff checks that the vshuff instruction works correctly when
> > > both vector registers are the same. Using vshuff in this way is
> > > undefined and will be rejected by the compiler in a future version of the
> toolchain.
> > >
> > > Signed-off-by: Marco Liebel 
> > > ---
> > >  tests/tcg/hexagon/hvx_misc.c | 45
> > > 
> > >  1 file changed, 45 deletions(-)
> >
> > Let's not remove the test completely.  Just change it to use different
> registers.
> 
> I'm fine either way.  But IIRC we added this test particularly in order to 
> verify
> the potentially ambiguous behavior of the same operand here.  It may be
> well tested otherwise.

I confirmed the hvx_histogram test executes this instruction, so
Reviewed-by: Taylor Simpson

Re: [PATCH v5 05/21] virtio-scsi: stop using aio_disable_external() during unplug

2023-05-09 Thread Stefan Hajnoczi

On Tue, May 09, 2023 at 08:55:14PM +0200, Kevin Wolf wrote:
> Am 04.05.2023 um 21:53 hat Stefan Hajnoczi geschrieben:
> > This patch is part of an effort to remove the aio_disable_external()
> > API because it does not fit in a multi-queue block layer world where
> > many AioContexts may be submitting requests to the same disk.
> > 
> > The SCSI emulation code is already in good shape to stop using
> > aio_disable_external(). It was only used by commit 9c5aad84da1c
> > ("virtio-scsi: fixed virtio_scsi_ctx_check failed when detaching scsi
> > disk") to ensure that virtio_scsi_hotunplug() works while the guest
> > driver is submitting I/O.
> > 
> > Ensure virtio_scsi_hotunplug() is safe as follows:
> > 
> > 1. qdev_simple_device_unplug_cb() -> qdev_unrealize() ->
> >device_set_realized() calls qatomic_set(>realized, false) so
> >that future scsi_device_get() calls return NULL because they exclude
> >SCSIDevices with realized=false.
> > 
> >That means virtio-scsi will reject new I/O requests to this
> >SCSIDevice with VIRTIO_SCSI_S_BAD_TARGET even while
> >virtio_scsi_hotunplug() is still executing. We are protected against
> >new requests!
> > 
> > 2. scsi_device_unrealize() already contains a call to
> 
> I think you mean scsi_qdev_unrealize(). Can be fixed while applying.

Yes, it should be scsi_qdev_unrealize(). I'll review your other comments
and fix this if I need to respin.

Stefan


signature.asc
Description: PGP signature

Re: ssl fips self check fails with 7.2.0 on x86 TCG

2023-05-09 Thread Patrick Venture

On Tue, May 9, 2023 at 9:51 AM Michael Tokarev  wrote:

> 09.05.2023 17:39, Philippe Mathieu-Daudé пишет:
> ..> Should be fixed in v7.2-stable:
> >
> > $ git log --oneline --grep=1d0b9261 v7.2.2
> > c45d10f655 target/i386: fix ADOX followed by ADCX
> > 6809dbc5c5 target/i386: Fix C flag for BLSI, BLSMSK, BLSR
> > 8d3c9fc439 target/i386: Fix BEXTR instruction
>
> Unfortunately it is still not released, -
> I haven't heard anything from Michael Roth since Apr-22 (when 7.2.2
> planned).
>

Thanks, I have it cherry-picked into our repo. :)


>
> /mjt
>

RE: [PATCH] Remove test_vshuff from hvx_misc tests

2023-05-09 Thread Brian Cain




> -Original Message-
> From: Taylor Simpson 
> Sent: Tuesday, May 9, 2023 2:28 PM
> To: Marco Liebel (QUIC) ; qemu-
> de...@nongnu.org
> Cc: Brian Cain ; Matheus Bernardino (QUIC)
> 
> Subject: RE: [PATCH] Remove test_vshuff from hvx_misc tests
> 
> 
> 
> > -Original Message-
> > From: Marco Liebel (QUIC) 
> > Sent: Tuesday, May 9, 2023 1:43 PM
> > To: qemu-devel@nongnu.org
> > Cc: Taylor Simpson ; Brian Cain
> > ; Matheus Bernardino (QUIC)
> > ; Marco Liebel (QUIC)
> > 
> > Subject: [PATCH] Remove test_vshuff from hvx_misc tests
> >
> > test_vshuff checks that the vshuff instruction works correctly when both
> > vector registers are the same. Using vshuff in this way is undefined and 
> > will
> > be rejected by the compiler in a future version of the toolchain.
> >
> > Signed-off-by: Marco Liebel 
> > ---
> >  tests/tcg/hexagon/hvx_misc.c | 45 
> >  1 file changed, 45 deletions(-)
> 
> Let's not remove the test completely.  Just change it to use different 
> registers.

I'm fine either way.  But IIRC we added this test particularly in order to 
verify the potentially ambiguous behavior of the same operand here.  It may be 
well tested otherwise.

RE: [PATCH] Remove test_vshuff from hvx_misc tests

2023-05-09 Thread Brian Cain




> -Original Message-
> From: Marco Liebel (QUIC) 
> Sent: Tuesday, May 9, 2023 1:43 PM
> To: qemu-devel@nongnu.org
> Cc: Taylor Simpson ; Brian Cain ;
> Matheus Bernardino (QUIC) ; Marco Liebel
> (QUIC) 
> Subject: [PATCH] Remove test_vshuff from hvx_misc tests
> 
> test_vshuff checks that the vshuff instruction works correctly when
> both vector registers are the same. Using vshuff in this way is
> undefined and will be rejected by the compiler in a future version of
> the toolchain.
> 
> Signed-off-by: Marco Liebel 
> ---
>  tests/tcg/hexagon/hvx_misc.c | 45 
>  1 file changed, 45 deletions(-)
> 
> diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
> index d0e64e035f..bc4e76d7f0 100644
> --- a/tests/tcg/hexagon/hvx_misc.c
> +++ b/tests/tcg/hexagon/hvx_misc.c
> @@ -342,49 +342,6 @@ static void test_vsubuwsat_dv(void)
>  check_output_w(__LINE__, 2);
>  }
> 
> -static void test_vshuff(void)
> -{
> -/* Test that vshuff works when the two operands are the same register */
> -const uint32_t splat = 0x089be55c;
> -const uint32_t shuff = 0x454fa926;
> -MMVector v0, v1;
> -
> -memset(expect, 0x12, sizeof(MMVector));
> -memset(output, 0x34, sizeof(MMVector));
> -
> -asm volatile("v25 = vsplat(%0)\n\t"
> - "vshuff(v25, v25, %1)\n\t"
> - "vmem(%2 + #0) = v25\n\t"
> - : /* no outputs */
> - : "r"(splat), "r"(shuff), "r"(output)
> - : "v25", "memory");
> -
> -/*
> - * The semantics of Hexagon are the operands are pass-by-value, so create
> - * two copies of the vsplat result.
> - */
> -for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
> -v0.uw[i] = splat;
> -v1.uw[i] = splat;
> -}
> -/* Do the vshuff operation */
> -for (int offset = 1; offset < MAX_VEC_SIZE_BYTES; offset <<= 1) {
> -if (shuff & offset) {
> -for (int k = 0; k < MAX_VEC_SIZE_BYTES; k++) {
> -if (!(k & offset)) {
> -uint8_t tmp = v0.ub[k];
> -v0.ub[k] = v1.ub[k + offset];
> -v1.ub[k + offset] = tmp;
> -}
> -}
> -}
> -}
> -/* Put the result in the expect buffer for verification */
> -expect[0] = v1;
> -
> -check_output_b(__LINE__, 1);
> -}
> -
>  static void test_load_tmp_predicated(void)
>  {
>  void *p0 = buffer0;
> @@ -489,8 +446,6 @@ int main()
>  test_vadduwsat();
>  test_vsubuwsat_dv();
> 
> -test_vshuff();
> -
>  test_load_tmp_predicated();
>  test_load_cur_predicated();
> 
> --
> 2.25.1

Reviewed-by: Brian Cain

RE: [PATCH] Remove test_vshuff from hvx_misc tests

2023-05-09 Thread Taylor Simpson




> -Original Message-
> From: Marco Liebel (QUIC) 
> Sent: Tuesday, May 9, 2023 1:43 PM
> To: qemu-devel@nongnu.org
> Cc: Taylor Simpson ; Brian Cain
> ; Matheus Bernardino (QUIC)
> ; Marco Liebel (QUIC)
> 
> Subject: [PATCH] Remove test_vshuff from hvx_misc tests
> 
> test_vshuff checks that the vshuff instruction works correctly when both
> vector registers are the same. Using vshuff in this way is undefined and will
> be rejected by the compiler in a future version of the toolchain.
> 
> Signed-off-by: Marco Liebel 
> ---
>  tests/tcg/hexagon/hvx_misc.c | 45 
>  1 file changed, 45 deletions(-)

Let's not remove the test completely.  Just change it to use different 
registers.

Thanks,
Taylor

Re: [PATCH 0/2] gitlab: improve artifact handling

2023-05-09 Thread Juan Quintela

Daniel P. Berrangé  wrote:
> We are missing test log artifacts from various check jobs on failure,
> and also missing test logs from the coverage job
>
> Daniel P. Berrangé (2):
>   gitlab: explicit set artifacts publishing criteria
>   gitlab: ensure coverage job also publishes meson log
>
>  .gitlab-ci.d/buildtest-template.yml  | 4 +++-
>  .gitlab-ci.d/buildtest.yml   | 5 +
>  .gitlab-ci.d/crossbuild-template.yml | 1 +
>  .gitlab-ci.d/crossbuilds.yml | 2 ++
>  .gitlab-ci.d/custom-runners.yml  | 1 +
>  .gitlab-ci.d/opensbi.yml | 1 +
>  6 files changed, 13 insertions(+), 1 deletion(-)

Daniel

I love the idea of this series.

But I have no idea about gitlab, yml or gitlab, so my review is
useless.

But thanks for doing it.

Later, Juan.

[PULL 06/10] build: move COLO under CONFIG_REPLICATION

2023-05-09 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

We don't allow to use x-colo capability when replication is not
configured. So, no reason to build COLO when replication is disabled,
it's unusable in this case.

Note also that the check in migrate_caps_check() is not the only
restriction: some functions in migration/colo.c will just abort if
called with not defined CONFIG_REPLICATION, for example:

migration_iteration_finish()
   case MIGRATION_STATUS_COLO:
   migrate_start_colo_process()
   colo_process_checkpoint()
   abort()

It could probably make sense to have possibility to enable COLO without
REPLICATION, but this requires deeper audit of colo & replication code,
which may be done later if needed.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Acked-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Message-Id: <20230428194928.1426370-4-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 hmp-commands.hx|  2 ++
 migration/colo.c   | 28 
 migration/meson.build  |  6 --
 migration/migration-hmp-cmds.c |  2 ++
 migration/migration.c  |  6 ++
 qapi/migration.json|  9 +---
 stubs/colo.c   | 39 ++
 stubs/meson.build  |  1 +
 8 files changed, 60 insertions(+), 33 deletions(-)
 create mode 100644 stubs/colo.c

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 9afbb54a51..2cbd0f77a0 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1052,6 +1052,7 @@ SRST
   migration (or once already in postcopy).
 ERST
 
+#ifdef CONFIG_REPLICATION
 {
 .name   = "x_colo_lost_heartbeat",
 .args_type  = "",
@@ -1060,6 +1061,7 @@ ERST
   "a failover or takeover is needed.",
 .cmd = hmp_x_colo_lost_heartbeat,
 },
+#endif
 
 SRST
 ``x_colo_lost_heartbeat``
diff --git a/migration/colo.c b/migration/colo.c
index c9e0b909b9..6c7c313956 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -26,9 +26,7 @@
 #include "qemu/rcu.h"
 #include "migration/failover.h"
 #include "migration/ram.h"
-#ifdef CONFIG_REPLICATION
 #include "block/replication.h"
-#endif
 #include "net/colo-compare.h"
 #include "net/colo.h"
 #include "block/block.h"
@@ -86,7 +84,6 @@ void colo_checkpoint_delay_set(void)
 static void secondary_vm_do_failover(void)
 {
 /* COLO needs enable block-replication */
-#ifdef CONFIG_REPLICATION
 int old_state;
 MigrationIncomingState *mis = migration_incoming_get_current();
 Error *local_err = NULL;
@@ -151,14 +148,10 @@ static void secondary_vm_do_failover(void)
 if (mis->migration_incoming_co) {
 qemu_coroutine_enter(mis->migration_incoming_co);
 }
-#else
-abort();
-#endif
 }
 
 static void primary_vm_do_failover(void)
 {
-#ifdef CONFIG_REPLICATION
 MigrationState *s = migrate_get_current();
 int old_state;
 Error *local_err = NULL;
@@ -199,9 +192,6 @@ static void primary_vm_do_failover(void)
 
 /* Notify COLO thread that failover work is finished */
 qemu_sem_post(>colo_exit_sem);
-#else
-abort();
-#endif
 }
 
 COLOMode get_colo_mode(void)
@@ -235,7 +225,6 @@ void colo_do_failover(void)
 }
 }
 
-#ifdef CONFIG_REPLICATION
 void qmp_xen_set_replication(bool enable, bool primary,
  bool has_failover, bool failover,
  Error **errp)
@@ -289,7 +278,6 @@ void qmp_xen_colo_do_checkpoint(Error **errp)
 /* Notify all filters of all NIC to do checkpoint */
 colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp);
 }
-#endif
 
 COLOStatus *qmp_query_colo_status(Error **errp)
 {
@@ -453,15 +441,11 @@ static int colo_do_checkpoint_transaction(MigrationState 
*s,
 }
 qemu_mutex_lock_iothread();
 
-#ifdef CONFIG_REPLICATION
 replication_do_checkpoint_all(_err);
 if (local_err) {
 qemu_mutex_unlock_iothread();
 goto out;
 }
-#else
-abort();
-#endif
 
 colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, _err);
 if (local_err) {
@@ -579,15 +563,11 @@ static void colo_process_checkpoint(MigrationState *s)
 object_unref(OBJECT(bioc));
 
 qemu_mutex_lock_iothread();
-#ifdef CONFIG_REPLICATION
 replication_start_all(REPLICATION_MODE_PRIMARY, _err);
 if (local_err) {
 qemu_mutex_unlock_iothread();
 goto out;
 }
-#else
-abort();
-#endif
 
 vm_start();
 qemu_mutex_unlock_iothread();
@@ -755,7 +735,6 @@ static void 
colo_incoming_process_checkpoint(MigrationIncomingState *mis,
 return;
 }
 
-#ifdef CONFIG_REPLICATION
 replication_get_error_all(_err);
 if (local_err) {
 error_propagate(errp, local_err);
@@ -772,9 +751,6 @@ static void 
colo_incoming_process_checkpoint(MigrationIncomingState *mis,
 qemu_mutex_unlock_iothread();
 return;
 }
-#else
-abort();
-#endif
 /* Notify all filters of all NIC to do

[PULL 09/10] migration: disallow change capabilities in COLO state

2023-05-09 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

COLO is not listed as running state in migrate_is_running(), so, it's
theoretically possible to disable colo capability in COLO state and the
unexpected error in migration_iteration_finish() is reachable.

Let's disallow that in qmp_migrate_set_capabilities. Than the error
becomes absolutely unreachable: we can get into COLO state only with
enabled capability and can't disable it while we are in COLO state. So
substitute the error by simple assertion.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Reviewed-by: Peter Xu 
Message-Id: <20230428194928.1426370-10-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 migration/migration.c | 5 +
 migration/options.c   | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 230f91f5a7..4959f7ee44 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2781,10 +2781,7 @@ static void migration_iteration_finish(MigrationState *s)
 runstate_set(RUN_STATE_POSTMIGRATE);
 break;
 case MIGRATION_STATUS_COLO:
-if (!migrate_colo()) {
-error_report("%s: critical error: calling COLO code without "
- "COLO enabled", __func__);
-}
+assert(migrate_colo());
 migrate_start_colo_process(s);
 s->vm_was_running = true;
 /* Fallthrough */
diff --git a/migration/options.c b/migration/options.c
index 9d92b15b76..7ed88b7b32 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -598,7 +598,7 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 MigrationCapabilityStatusList *cap;
 bool new_caps[MIGRATION_CAPABILITY__MAX];
 
-if (migration_is_running(s->state)) {
+if (migration_is_running(s->state) || migration_in_colo_state()) {
 error_setg(errp, QERR_MIGRATION_ACTIVE);
 return;
 }
-- 
2.40.0

[PULL 03/10] multifd: Add the ramblock to MultiFDRecvParams

2023-05-09 Thread Juan Quintela

From: Lukas Straub 

This will be used in the next commits to add colo support to multifd.

Signed-off-by: Lukas Straub 
Reviewed-by: Juan Quintela 
Message-Id: 
<88135197411df1a71d7832962b39abf60faf0021.1683572883.git.lukasstra...@web.de>
Signed-off-by: Juan Quintela 
---
 migration/multifd.c | 11 +--
 migration/multifd.h |  2 ++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 4e71c19292..5c4298eadf 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -281,7 +281,6 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
 static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
 {
 MultiFDPacket_t *packet = p->packet;
-RAMBlock *block;
 int i;
 
 packet->magic = be32_to_cpu(packet->magic);
@@ -331,21 +330,21 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams 
*p, Error **errp)
 
 /* make sure that ramblock is 0 terminated */
 packet->ramblock[255] = 0;
-block = qemu_ram_block_by_name(packet->ramblock);
-if (!block) {
+p->block = qemu_ram_block_by_name(packet->ramblock);
+if (!p->block) {
 error_setg(errp, "multifd: unknown ram block %s",
packet->ramblock);
 return -1;
 }
 
-p->host = block->host;
+p->host = p->block->host;
 for (i = 0; i < p->normal_num; i++) {
 uint64_t offset = be64_to_cpu(packet->offset[i]);
 
-if (offset > (block->used_length - p->page_size)) {
+if (offset > (p->block->used_length - p->page_size)) {
 error_setg(errp, "multifd: offset too long %" PRIu64
" (max " RAM_ADDR_FMT ")",
-   offset, block->used_length);
+   offset, p->block->used_length);
 return -1;
 }
 p->normal[i] = offset;
diff --git a/migration/multifd.h b/migration/multifd.h
index 7cfc265148..a835643b48 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -175,6 +175,8 @@ typedef struct {
 uint32_t next_packet_size;
 /* packets sent through this channel */
 uint64_t num_packets;
+/* ramblock */
+RAMBlock *block;
 /* ramblock host address */
 uint8_t *host;
 /* non zero pages recv through this channel */
-- 
2.40.0

[PULL 07/10] migration: drop colo_incoming_thread from MigrationIncomingState

2023-05-09 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

have_colo_incoming_thread variable is unused. colo_incoming_thread can
be local.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Reviewed-by: Peter Xu 
Reviewed-by: Zhang Chen 
Message-Id: <20230428194928.1426370-6-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 migration/migration.c | 7 ---
 migration/migration.h | 2 --
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 01ee92e699..3ab3b1c3e6 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -544,6 +544,8 @@ process_incoming_migration_co(void *opaque)
 
 /* we get COLO info, and know if we are in COLO mode */
 if (!ret && migration_incoming_colo_enabled()) {
+QemuThread colo_incoming_thread;
+
 /* Make sure all file formats throw away their mutable metadata */
 bdrv_activate_all(_err);
 if (local_err) {
@@ -551,14 +553,13 @@ process_incoming_migration_co(void *opaque)
 goto fail;
 }
 
-qemu_thread_create(>colo_incoming_thread, "COLO incoming",
+qemu_thread_create(_incoming_thread, "COLO incoming",
  colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
-mis->have_colo_incoming_thread = true;
 qemu_coroutine_yield();
 
 qemu_mutex_unlock_iothread();
 /* Wait checkpoint incoming thread exit before free resource */
-qemu_thread_join(>colo_incoming_thread);
+qemu_thread_join(_incoming_thread);
 qemu_mutex_lock_iothread();
 /* We hold the global iothread lock, so it is safe here */
 colo_release_ram_cache();
diff --git a/migration/migration.h b/migration/migration.h
index 3a918514e7..7721c7658b 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -162,8 +162,6 @@ struct MigrationIncomingState {
 
 int state;
 
-bool have_colo_incoming_thread;
-QemuThread colo_incoming_thread;
 /* The coroutine we should enter (back) after failover */
 Coroutine *migration_incoming_co;
 QemuSemaphore colo_incoming_sem;
-- 
2.40.0

[PULL 10/10] migration: block incoming colo when capability is disabled

2023-05-09 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

We generally require same set of capabilities on source and target.
Let's require x-colo capability to use COLO on target.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Reviewed-by: Peter Xu 
Reviewed-by: Lukas Straub 
Reviewed-by: Zhang Chen 
Message-Id: <20230428194928.1426370-11-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 docs/COLO-FT.txt  | 1 +
 migration/migration.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt
index 8ec653f81c..2e760a4aee 100644
--- a/docs/COLO-FT.txt
+++ b/docs/COLO-FT.txt
@@ -210,6 +210,7 @@ children.0=childs0 \
 
 3. On Secondary VM's QEMU monitor, issue command
 {"execute":"qmp_capabilities"}
+{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ 
{"capability": "x-colo", "state": true } ] } }
 {"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": 
{"host": "0.0.0.0", "port": ""} } } }
 {"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable": 
true } }
 
diff --git a/migration/migration.c b/migration/migration.c
index 4959f7ee44..7558c8edbd 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -398,6 +398,12 @@ int migration_incoming_enable_colo(void)
 return -ENOTSUP;
 #endif
 
+if (!migrate_colo()) {
+error_report("ENABLE_COLO command come in migration stream, but c-colo 
"
+ "capability is not set");
+return -EINVAL;
+}
+
 if (ram_block_discard_disable(true)) {
 error_report("COLO: cannot disable RAM discard");
 return -EBUSY;
-- 
2.40.0

[PULL 08/10] migration: process_incoming_migration_co: simplify code flow around ret

2023-05-09 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Reviewed-by: Peter Xu 
Reviewed-by: Zhang Chen 
Message-Id: <20230428194928.1426370-7-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 migration/migration.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 3ab3b1c3e6..230f91f5a7 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -542,8 +542,13 @@ process_incoming_migration_co(void *opaque)
 /* Else if something went wrong then just fall out of the normal exit 
*/
 }
 
+if (ret < 0) {
+error_report("load of migration failed: %s", strerror(-ret));
+goto fail;
+}
+
 /* we get COLO info, and know if we are in COLO mode */
-if (!ret && migration_incoming_colo_enabled()) {
+if (migration_incoming_colo_enabled()) {
 QemuThread colo_incoming_thread;
 
 /* Make sure all file formats throw away their mutable metadata */
@@ -565,10 +570,6 @@ process_incoming_migration_co(void *opaque)
 colo_release_ram_cache();
 }
 
-if (ret < 0) {
-error_report("load of migration failed: %s", strerror(-ret));
-goto fail;
-}
 mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
 qemu_bh_schedule(mis->bh);
 mis->migration_incoming_co = NULL;
-- 
2.40.0

[PULL 02/10] ram: Let colo_flush_ram_cache take the bitmap_mutex

2023-05-09 Thread Juan Quintela

From: Lukas Straub 

This is not required, colo_flush_ram_cache does not run concurrently
with the multifd threads since the cache is only flushed after
everything has been received. But it makes me more comfortable.

This will be used in the next commits to add colo support to multifd.

Signed-off-by: Lukas Straub 
Reviewed-by: Juan Quintela 
Message-Id: 
<35cb23ba854151d38a31e3a5c8a1020e4283cb4a.1683572883.git.lukasstra...@web.de>
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/migration/ram.c b/migration/ram.c
index 0346c1c1ed..3fa720dad9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3814,6 +3814,7 @@ void colo_flush_ram_cache(void)
 unsigned long offset = 0;
 
 memory_global_dirty_log_sync();
+qemu_mutex_lock(_state->bitmap_mutex);
 WITH_RCU_READ_LOCK_GUARD() {
 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
 ramblock_sync_dirty_bitmap(ram_state, block);
@@ -3848,6 +3849,7 @@ void colo_flush_ram_cache(void)
 }
 }
 }
+qemu_mutex_unlock(_state->bitmap_mutex);
 trace_colo_flush_ram_cache_end();
 }
 
-- 
2.40.0

[PULL 00/10] Migration 20230509 patches

2023-05-09 Thread Juan Quintela

The following changes since commit 271477b59e723250f17a7e20f139262057921b6a:

  Merge tag 'compression-code-pull-request' of 
https://gitlab.com/juan.quintela/qemu into staging (2023-05-08 20:38:05 +0100)

are available in the Git repository at:

  https://gitlab.com/juan.quintela/qemu.git tags/migration-20230509-pull-request

for you to fetch changes up to 5f43d297bc2b9530805ad8602c6e2ea284b08628:

  migration: block incoming colo when capability is disabled (2023-05-09 
20:52:21 +0200)


Migration Pull request (20230509 vintage)

Hi
In this PULL request:
- 1st part of colo support for multifd (lukas)
- 1st part of disabling colo option (vladimir)

Please, apply.



Lukas Straub (3):
  ram: Add public helper to set colo bitmap
  ram: Let colo_flush_ram_cache take the bitmap_mutex
  multifd: Add the ramblock to MultiFDRecvParams

Vladimir Sementsov-Ogievskiy (7):
  block/meson.build: prefer positive condition for replication
  colo: make colo_checkpoint_notify static and provide simpler API
  build: move COLO under CONFIG_REPLICATION
  migration: drop colo_incoming_thread from MigrationIncomingState
  migration: process_incoming_migration_co: simplify code flow around
ret
  migration: disallow change capabilities in COLO state
  migration: block incoming colo when capability is disabled

 block/meson.build  |  2 +-
 docs/COLO-FT.txt   |  1 +
 hmp-commands.hx|  2 ++
 include/migration/colo.h   |  9 +-
 migration/colo.c   | 57 +++---
 migration/meson.build  |  6 ++--
 migration/migration-hmp-cmds.c |  2 ++
 migration/migration.c  | 35 ++---
 migration/migration.h  |  2 --
 migration/multifd.c| 11 +++
 migration/multifd.h|  2 ++
 migration/options.c|  6 ++--
 migration/ram.c| 19 ++--
 migration/ram.h|  1 +
 qapi/migration.json|  9 --
 stubs/colo.c   | 39 +++
 stubs/meson.build  |  1 +
 17 files changed, 131 insertions(+), 73 deletions(-)
 create mode 100644 stubs/colo.c

-- 
2.40.0

[PULL 04/10] block/meson.build: prefer positive condition for replication

2023-05-09 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Lukas Straub 
Reviewed-by: Zhang Chen 
Message-Id: <20230428194928.1426370-2-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 block/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/meson.build b/block/meson.build
index 382bec0e7d..b9a72e219b 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -84,7 +84,7 @@ block_ss.add(when: 'CONFIG_WIN32', if_true: 
files('file-win32.c', 'win32-aio.c')
 block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, 
iokit])
 block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c'))
 block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
-if not get_option('replication').disabled()
+if get_option('replication').allowed()
   block_ss.add(files('replication.c'))
 endif
 block_ss.add(when: libaio, if_true: files('linux-aio.c'))
-- 
2.40.0

[PULL 05/10] colo: make colo_checkpoint_notify static and provide simpler API

2023-05-09 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

colo_checkpoint_notify() is mostly used in colo.c. Outside we use it
once when x-checkpoint-delay migration parameter is set. So, let's
simplify the external API to only that function - notify COLO that
parameter was set. This make external API more robust and hides
implementation details from external callers. Also this helps us to
make COLO module optional in further patch (i.e. we are going to add
possibility not build the COLO module).

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Reviewed-by: Peter Xu 
Reviewed-by: Zhang Chen 
Message-Id: <20230428194928.1426370-3-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 include/migration/colo.h |  9 -
 migration/colo.c | 29 ++---
 migration/options.c  |  4 +---
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/include/migration/colo.h b/include/migration/colo.h
index 5fbe1a6d5d..7ef315473e 100644
--- a/include/migration/colo.h
+++ b/include/migration/colo.h
@@ -36,6 +36,13 @@ COLOMode get_colo_mode(void);
 /* failover */
 void colo_do_failover(void);
 
-void colo_checkpoint_notify(void *opaque);
+/*
+ * colo_checkpoint_delay_set
+ *
+ * Handles change of x-checkpoint-delay migration parameter, called from
+ * migrate_params_apply() to notify COLO module about the change.
+ */
+void colo_checkpoint_delay_set(void);
+
 void colo_shutdown(void);
 #endif
diff --git a/migration/colo.c b/migration/colo.c
index 07bfa21fea..c9e0b909b9 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -65,6 +65,24 @@ static bool colo_runstate_is_stopped(void)
 return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
 }
 
+static void colo_checkpoint_notify(void *opaque)
+{
+MigrationState *s = opaque;
+int64_t next_notify_time;
+
+qemu_event_set(>colo_checkpoint_event);
+s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay();
+timer_mod(s->colo_delay_timer, next_notify_time);
+}
+
+void colo_checkpoint_delay_set(void)
+{
+if (migration_in_colo_state()) {
+colo_checkpoint_notify(migrate_get_current());
+}
+}
+
 static void secondary_vm_do_failover(void)
 {
 /* COLO needs enable block-replication */
@@ -644,17 +662,6 @@ out:
 }
 }
 
-void colo_checkpoint_notify(void *opaque)
-{
-MigrationState *s = opaque;
-int64_t next_notify_time;
-
-qemu_event_set(>colo_checkpoint_event);
-s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
-next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay();
-timer_mod(s->colo_delay_timer, next_notify_time);
-}
-
 void migrate_start_colo_process(MigrationState *s)
 {
 qemu_mutex_unlock_iothread();
diff --git a/migration/options.c b/migration/options.c
index 2e759cc306..9d92b15b76 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -1253,9 +1253,7 @@ static void migrate_params_apply(MigrateSetParameters 
*params, Error **errp)
 
 if (params->has_x_checkpoint_delay) {
 s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
-if (migration_in_colo_state()) {
-colo_checkpoint_notify(s);
-}
+colo_checkpoint_delay_set();
 }
 
 if (params->has_block_incremental) {
-- 
2.40.0

[PULL 01/10] ram: Add public helper to set colo bitmap

2023-05-09 Thread Juan Quintela

From: Lukas Straub 

The overhead of the mutex in non-multifd mode is negligible,
because in that case its just the single thread taking the mutex.

This will be used in the next commits to add colo support to multifd.

Signed-off-by: Lukas Straub 
Reviewed-by: Juan Quintela 
Message-Id: 
<22d83cb428f37929563155531bfb69fd8953cc61.1683572883.git.lukasstra...@web.de>
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 17 ++---
 migration/ram.h |  1 +
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index f78e9912cd..0346c1c1ed 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3408,6 +3408,18 @@ static ram_addr_t 
host_page_offset_from_ram_block_offset(RAMBlock *block,
 return ((uintptr_t)block->host + offset) & (block->page_size - 1);
 }
 
+void colo_record_bitmap(RAMBlock *block, ram_addr_t *normal, uint normal_num)
+{
+qemu_mutex_lock(_state->bitmap_mutex);
+for (int i = 0; i < normal_num; i++) {
+ram_addr_t offset = normal[i];
+ram_state->migration_dirty_pages += !test_and_set_bit(
+offset >> TARGET_PAGE_BITS,
+block->bmap);
+}
+qemu_mutex_unlock(_state->bitmap_mutex);
+}
+
 static inline void *colo_cache_from_block_offset(RAMBlock *block,
  ram_addr_t offset, bool record_bitmap)
 {
@@ -3425,9 +3437,8 @@ static inline void *colo_cache_from_block_offset(RAMBlock 
*block,
 * It help us to decide which pages in ram cache should be flushed
 * into VM's RAM later.
 */
-if (record_bitmap &&
-!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
-ram_state->migration_dirty_pages++;
+if (record_bitmap) {
+colo_record_bitmap(block, , 1);
 }
 return block->colo_cache + offset;
 }
diff --git a/migration/ram.h b/migration/ram.h
index 6fffbeb5f1..887d1fbae6 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -82,6 +82,7 @@ int colo_init_ram_cache(void);
 void colo_flush_ram_cache(void);
 void colo_release_ram_cache(void);
 void colo_incoming_start_dirty_log(void);
+void colo_record_bitmap(RAMBlock *block, ram_addr_t *normal, uint normal_num);
 
 /* Background snapshot */
 bool ram_write_tracking_available(void);
-- 
2.40.0

RE: [PATCH v3 3/6] Hexagon: add core gdbstub xml data for LLDB

2023-05-09 Thread Taylor Simpson




> -Original Message-
> From: Matheus Tavares Bernardino 
> Sent: Thursday, May 4, 2023 10:38 AM
> To: qemu-devel@nongnu.org
> Cc: alex.ben...@linaro.org; Brian Cain ;
> f4...@amsat.org; peter.mayd...@linaro.org; Taylor Simpson
> ; phi...@linaro.org;
> richard.hender...@linaro.org; Laurent Vivier 
> Subject: [PATCH v3 3/6] Hexagon: add core gdbstub xml data for LLDB
> 
> Signed-off-by: Matheus Tavares Bernardino 
> ---
>  MAINTAINERS|  1 +
>  configs/targets/hexagon-linux-user.mak |  1 +
>  target/hexagon/cpu.c   |  3 +-
>  gdb-xml/hexagon-core.xml   | 84 ++
>  4 files changed, 88 insertions(+), 1 deletion(-)  create mode 100644 gdb-
> xml/hexagon-core.xml

Reviewed-by: Taylor Simpson

Re: [PATCH v5 00/21] block: remove aio_disable_external() API

2023-05-09 Thread Kevin Wolf

Am 04.05.2023 um 21:53 hat Stefan Hajnoczi geschrieben:
> v5:
> - Use atomic accesses for in_flight counter in vhost-user-server.c [Kevin]
> - Stash SCSIDevice id/lun values for VIRTIO_SCSI_T_TRANSPORT_RESET event
>   before unrealizing the SCSIDevice [Kevin]
> - Keep vhost-user-blk export .detach() callback so ctx is set to NULL [Kevin]
> - Narrow BdrvChildClass and BlockDriver drained_{begin/end/poll} callbacks 
> from
>   IO_OR_GS_CODE() to GLOBAL_STATE_CODE() [Kevin]
> - Include Kevin's "block: Fix use after free in blockdev_mark_auto_del()" to
>   fix a latent bug that was exposed by this series
> 
> v4:
> - Remove external_disable_cnt variable [Philippe]
> - Add Patch 1 to fix assertion failure in .drained_end() -> 
> blk_get_aio_context()
> v3:
> - Resend full patch series. v2 was sent in the middle of a git rebase and was
>   missing patches. [Eric]
> - Apply Reviewed-by tags.
> v2:
> - Do not rely on BlockBackend request queuing, implement .drained_begin/end()
>   instead in xen-block, virtio-blk, and virtio-scsi [Paolo]
> - Add qdev_is_realized() API [Philippe]
> - Add patch to avoid AioContext lock around blk_exp_ref/unref() [Paolo]
> - Add patch to call .drained_begin/end() from main loop thread to simplify
>   callback implementations
> 
> The aio_disable_external() API temporarily suspends file descriptor monitoring
> in the event loop. The block layer uses this to prevent new I/O requests being
> submitted from the guest and elsewhere between bdrv_drained_begin() and
> bdrv_drained_end().
> 
> While the block layer still needs to prevent new I/O requests in drained
> sections, the aio_disable_external() API can be replaced with
> .drained_begin/end/poll() callbacks that have been added to BdrvChildClass and
> BlockDevOps.
> 
> This newer .bdrained_begin/end/poll() approach is attractive because it works
> without specifying a specific AioContext. The block layer is moving towards
> multi-queue and that means multiple AioContexts may be processing I/O
> simultaneously.
> 
> The aio_disable_external() was always somewhat hacky. It suspends all file
> descriptors that were registered with is_external=true, even if they have
> nothing to do with the BlockDriverState graph nodes that are being drained.
> It's better to solve a block layer problem in the block layer than to have an
> odd event loop API solution.
> 
> The approach in this patch series is to implement BlockDevOps
> .drained_begin/end() callbacks that temporarily stop file descriptor handlers.
> This ensures that new I/O requests are not submitted in drained sections.

Patches 2-16: Reviewed-by: Kevin Wolf

command line, guest console output missing from avocado log

2023-05-09 Thread Peter Maydell

I just noticed that the guest console output seems to no longer
be in the avocado log file. Can it be reinstated, please?
The console logs are typically the most useful clue to "why did this
test fail" and without it you're just guessing in the dark...
The details of what QEMU command line avocado is running
also seem to have vanished : that also is among the most
useful items of information to have in the log.

thanks
-- PMM

Re: [PATCH v5 05/21] virtio-scsi: stop using aio_disable_external() during unplug

2023-05-09 Thread Kevin Wolf

Am 04.05.2023 um 21:53 hat Stefan Hajnoczi geschrieben:
> This patch is part of an effort to remove the aio_disable_external()
> API because it does not fit in a multi-queue block layer world where
> many AioContexts may be submitting requests to the same disk.
> 
> The SCSI emulation code is already in good shape to stop using
> aio_disable_external(). It was only used by commit 9c5aad84da1c
> ("virtio-scsi: fixed virtio_scsi_ctx_check failed when detaching scsi
> disk") to ensure that virtio_scsi_hotunplug() works while the guest
> driver is submitting I/O.
> 
> Ensure virtio_scsi_hotunplug() is safe as follows:
> 
> 1. qdev_simple_device_unplug_cb() -> qdev_unrealize() ->
>device_set_realized() calls qatomic_set(>realized, false) so
>that future scsi_device_get() calls return NULL because they exclude
>SCSIDevices with realized=false.
> 
>That means virtio-scsi will reject new I/O requests to this
>SCSIDevice with VIRTIO_SCSI_S_BAD_TARGET even while
>virtio_scsi_hotunplug() is still executing. We are protected against
>new requests!
> 
> 2. scsi_device_unrealize() already contains a call to

I think you mean scsi_qdev_unrealize(). Can be fixed while applying.

>scsi_device_purge_requests() so that in-flight requests are cancelled
>synchronously. This ensures that no in-flight requests remain once
>qdev_simple_device_unplug_cb() returns.
> 
> Thanks to these two conditions we don't need aio_disable_external()
> anymore.
> 
> Cc: Zhengui Li 
> Reviewed-by: Paolo Bonzini 
> Reviewed-by: Daniil Tatianin 
> Signed-off-by: Stefan Hajnoczi 

Kevin

Re: [PATCH v4 09/10] migration: disallow change capabilities in COLO state

2023-05-09 Thread Juan Quintela

Vladimir Sementsov-Ogievskiy  wrote:
> COLO is not listed as running state in migrate_is_running(), so, it's
> theoretically possible to disable colo capability in COLO state and the
> unexpected error in migration_iteration_finish() is reachable.
>
> Let's disallow that in qmp_migrate_set_capabilities. Than the error
> becomes absolutely unreachable: we can get into COLO state only with
> enabled capability and can't disable it while we are in COLO state. So
> substitute the error by simple assertion.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 

Reviewed-by: Juan Quintela

Re: [PATCH] hw/mips/malta: Fix the malta machine on big endian hosts

2023-05-09 Thread Peter Maydell

On Thu, 13 Apr 2023 at 17:26, Peter Maydell  wrote:
>
> On Thu, 13 Apr 2023 at 17:08, Michael Tokarev  wrote:
> >
> > 30.03.2023 18:26, Thomas Huth wrote:
> > > Booting a Linux kernel with the malta machine is currently broken
> > > on big endian hosts. The cpu_to_gt32 macro wants to byteswap a value
> > > for little endian targets only, but uses the wrong way to do this:
> > > cpu_to_[lb]e32 works the other way round on big endian hosts! Fix
> > > it by using the same ways on both, big and little endian hosts.
> > >
> > > Fixes: 0c8427baf0 ("hw/mips/malta: Use bootloader helper to set BAR 
> > > registers")
> > > Signed-off-by: Thomas Huth 
> >
> > Has this been forgotten?
>
> Looks like it. Too late for 8.0 now (and it wasn't a regression
> since it looks like it was broken in 7.2 as well); will have to
> be fixed in 8.1.

Philippe -- looks like this patch still hasn't been queued ?
(It could probably use a Cc: qemu-sta...@nongnu.org at this point.)

It can have my
Reviewed-by: Peter Maydell 

thanks
-- PMM

[PATCH] Remove test_vshuff from hvx_misc tests

2023-05-09 Thread Marco Liebel

test_vshuff checks that the vshuff instruction works correctly when
both vector registers are the same. Using vshuff in this way is
undefined and will be rejected by the compiler in a future version of
the toolchain.

Signed-off-by: Marco Liebel 
---
 tests/tcg/hexagon/hvx_misc.c | 45 
 1 file changed, 45 deletions(-)

diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
index d0e64e035f..bc4e76d7f0 100644
--- a/tests/tcg/hexagon/hvx_misc.c
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -342,49 +342,6 @@ static void test_vsubuwsat_dv(void)
 check_output_w(__LINE__, 2);
 }
 
-static void test_vshuff(void)
-{
-/* Test that vshuff works when the two operands are the same register */
-const uint32_t splat = 0x089be55c;
-const uint32_t shuff = 0x454fa926;
-MMVector v0, v1;
-
-memset(expect, 0x12, sizeof(MMVector));
-memset(output, 0x34, sizeof(MMVector));
-
-asm volatile("v25 = vsplat(%0)\n\t"
- "vshuff(v25, v25, %1)\n\t"
- "vmem(%2 + #0) = v25\n\t"
- : /* no outputs */
- : "r"(splat), "r"(shuff), "r"(output)
- : "v25", "memory");
-
-/*
- * The semantics of Hexagon are the operands are pass-by-value, so create
- * two copies of the vsplat result.
- */
-for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
-v0.uw[i] = splat;
-v1.uw[i] = splat;
-}
-/* Do the vshuff operation */
-for (int offset = 1; offset < MAX_VEC_SIZE_BYTES; offset <<= 1) {
-if (shuff & offset) {
-for (int k = 0; k < MAX_VEC_SIZE_BYTES; k++) {
-if (!(k & offset)) {
-uint8_t tmp = v0.ub[k];
-v0.ub[k] = v1.ub[k + offset];
-v1.ub[k + offset] = tmp;
-}
-}
-}
-}
-/* Put the result in the expect buffer for verification */
-expect[0] = v1;
-
-check_output_b(__LINE__, 1);
-}
-
 static void test_load_tmp_predicated(void)
 {
 void *p0 = buffer0;
@@ -489,8 +446,6 @@ int main()
 test_vadduwsat();
 test_vsubuwsat_dv();
 
-test_vshuff();
-
 test_load_tmp_predicated();
 test_load_cur_predicated();
 
-- 
2.25.1

Re: [PATCH v4 04/10] configure: add --disable-colo-proxy option

2023-05-09 Thread Juan Quintela

"Zhang, Chen"  wrote:
>> -Original Message-
>> From: Vladimir Sementsov-Ogievskiy 
>> Sent: Saturday, April 29, 2023 3:49 AM
>> To: qemu-devel@nongnu.org
>> Cc: lukasstra...@web.de; quint...@redhat.com; Zhang, Chen
>> ; vsement...@yandex-team.ru; Paolo Bonzini
>> ; Marc-André Lureau
>> ; Daniel P. Berrangé
>> ; Thomas Huth ; Philippe
>> Mathieu-Daudé ; Jason Wang 
>> Subject: [PATCH v4 04/10] configure: add --disable-colo-proxy option
>> 
>> Add option to not build filter-mirror, filter-rewriter and colo-compare when
>> they are not needed.
>
> Typo: This patch still build the filter-mirror/filter-redirector in 
> filter-mirror.c.
> Please remove the "filter-mirror" here.
> Other code look good to me.

Vladimir, I was doing this myself, with the bit attached.

But then I noticed that one needs to also disable
tests/qtest/test-filter-mirror and test-filter-rewriter.

Can you resend with that fixed?  Or I am missing something more
fundamental.

Thanks, Juan.

>> --- a/net/meson.build
>> +++ b/net/meson.build
>> @@ -1,13 +1,10 @@
>>  softmmu_ss.add(files(
>>'announce.c',
>>'checksum.c',
>> -  'colo-compare.c',
>> -  'colo.c',
>>'dump.c',
>>'eth.c',
>>'filter-buffer.c',
>>'filter-mirror.c',
>> -  'filter-rewriter.c',
>>'filter.c',
>>'hub.c',
>>'net-hmp-cmds.c',
>> @@ -19,6 +16,16 @@ softmmu_ss.add(files(
>>'util.c',
>>  ))
>> 
>> +if get_option('replication').allowed() or \
>> +get_option('colo_proxy').allowed()
>> +  softmmu_ss.add(files('colo-compare.c'))
>> +  softmmu_ss.add(files('colo.c'))
>> +endif
>> +
>> +if get_option('colo_proxy').allowed()
>> +  softmmu_ss.add(files('filter-rewriter.c'))
>> +endif
>> +
>>  softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('filter-replay.c'))

This is the change needed, right?

diff --git a/net/meson.build b/net/meson.build
index 6f4ecde57f..e623bb9acb 100644
--- a/net/meson.build
+++ b/net/meson.build
@@ -4,7 +4,6 @@ softmmu_ss.add(files(
   'dump.c',
   'eth.c',
   'filter-buffer.c',
-  'filter-mirror.c',
   'filter.c',
   'hub.c',
   'net-hmp-cmds.c',
@@ -23,7 +22,7 @@ if get_option('replication').allowed() or \
 endif
 
 if get_option('colo_proxy').allowed()
-  softmmu_ss.add(files('filter-rewriter.c'))
+  softmmu_ss.add(files('filter-rewriter.c', 'filter-mirror.c'))
 endif

Re: [PATCH v5 00/21] block: remove aio_disable_external() API

2023-05-09 Thread Kevin Wolf

Am 09.05.2023 um 19:51 hat Stefan Hajnoczi geschrieben:
> On Thu, May 04, 2023 at 11:44:42PM +0200, Kevin Wolf wrote:
> > Am 04.05.2023 um 21:53 hat Stefan Hajnoczi geschrieben:
> > > v5:
> > > - Use atomic accesses for in_flight counter in vhost-user-server.c [Kevin]
> > > - Stash SCSIDevice id/lun values for VIRTIO_SCSI_T_TRANSPORT_RESET event
> > >   before unrealizing the SCSIDevice [Kevin]
> > > - Keep vhost-user-blk export .detach() callback so ctx is set to NULL 
> > > [Kevin]
> > > - Narrow BdrvChildClass and BlockDriver drained_{begin/end/poll} 
> > > callbacks from
> > >   IO_OR_GS_CODE() to GLOBAL_STATE_CODE() [Kevin]
> > > - Include Kevin's "block: Fix use after free in blockdev_mark_auto_del()" 
> > > to
> > >   fix a latent bug that was exposed by this series
> > 
> > I only just finished reviewing v4 when you had already sent v5, but it
> > hadn't arrived yet. I had a few more comments on what are now patches
> > 17, 18, 19 and 21 in v5. I think they all still apply.
> 
> I'm not sure which comments from v4 still apply. In my email client all
> your replies were already read when I sent v5.

Yes, but I added some more replies after you had sent v5 (and before I
fetched mail again to actually see v5).

> Maybe you can share the Message-Id of something I still need to address?

I thought the patch numbers identified them and were easier, but sure:

Message-ID: 
Message-ID: 
Message-ID: 
Message-ID: 

Kevin


signature.asc
Description: PGP signature

Re: [PATCH v4 10/10] migration: block incoming colo when capability is disabled

2023-05-09 Thread Juan Quintela

Vladimir Sementsov-Ogievskiy  wrote:
> We generally require same set of capabilities on source and target.
> Let's require x-colo capability to use COLO on target.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 

Reviewed-by: Juan Quintela 

I will update the docs as said by lukas.

> ---
>  migration/migration.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 8c5bbf3e94..5e162c0622 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -395,6 +395,12 @@ int migration_incoming_enable_colo(void)
>  return -ENOTSUP;
>  #endif
>  
> +if (!migrate_colo()) {
> +error_report("ENABLE_COLO command come in migration stream, but 
> c-colo "
> + "capability is not set");
> +return -EINVAL;
> +}
> +
>  if (ram_block_discard_disable(true)) {
>  error_report("COLO: cannot disable RAM discard");
>  return -EBUSY;

Re: [PATCH v4 09/10] migration: disallow change capabilities in COLO state

2023-05-09 Thread Juan Quintela

"Zhang, Chen"  wrote:
>> -Original Message-
>> From: Vladimir Sementsov-Ogievskiy 
>> Sent: Thursday, May 4, 2023 4:23 PM
>> To: Zhang, Chen ; qemu-devel@nongnu.org
>> Cc: lukasstra...@web.de; quint...@redhat.com; Peter Xu
>> ; Leonardo Bras 
>> Subject: Re: [PATCH v4 09/10] migration: disallow change capabilities in COLO
>> state
>> 
>> On 04.05.23 11:09, Zhang, Chen wrote:
>> >
>> >
>> >> -Original Message-
>> >> From: Vladimir Sementsov-Ogievskiy 
>> >> Sent: Saturday, April 29, 2023 3:49 AM
>> >> To: qemu-devel@nongnu.org
>> >> Cc: lukasstra...@web.de; quint...@redhat.com; Zhang, Chen
>> >> ; vsement...@yandex-team.ru; Peter Xu
>> >> ; Leonardo Bras 
>> >> Subject: [PATCH v4 09/10] migration: disallow change capabilities in
>> >> COLO state
>> >>
>> >> COLO is not listed as running state in migrate_is_running(), so, it's
>> >> theoretically possible to disable colo capability in COLO state and
>> >> the unexpected error in migration_iteration_finish() is reachable.
>> >>
>> >> Let's disallow that in qmp_migrate_set_capabilities. Than the error
>> >> becomes absolutely unreachable: we can get into COLO state only with
>> >> enabled capability and can't disable it while we are in COLO state.
>> >> So substitute the error by simple assertion.
>> >>
>> >> Signed-off-by: Vladimir Sementsov-Ogievskiy
>> >> 
>> >> ---
>> >>   migration/migration.c | 5 +
>> >>   migration/options.c   | 2 +-
>> >>   2 files changed, 2 insertions(+), 5 deletions(-)
>> >>
>> >> diff --git a/migration/migration.c b/migration/migration.c index
>> >> 0d912ee0d7..8c5bbf3e94 100644
>> >> --- a/migration/migration.c
>> >> +++ b/migration/migration.c
>> >> @@ -2751,10 +2751,7 @@ static void
>> >> migration_iteration_finish(MigrationState *s)
>> >>   runstate_set(RUN_STATE_POSTMIGRATE);
>> >>   break;
>> >>   case MIGRATION_STATUS_COLO:
>> >> -if (!migrate_colo()) {
>> >> -error_report("%s: critical error: calling COLO code without "
>> >> - "COLO enabled", __func__);
>> >> -}
>> >> +assert(migrate_colo());
>> >>   migrate_start_colo_process(s);
>> >>   s->vm_was_running = true;
>> >>   /* Fallthrough */
>> >> diff --git a/migration/options.c b/migration/options.c index
>> >> 865a0214d8..f461d02eeb 100644
>> >> --- a/migration/options.c
>> >> +++ b/migration/options.c
>> >> @@ -598,7 +598,7 @@ void
>> >> qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>> >>   MigrationCapabilityStatusList *cap;
>> >>   bool new_caps[MIGRATION_CAPABILITY__MAX];
>> >>
>> >> -if (migration_is_running(s->state)) {
>> >> +if (migration_is_running(s->state) || migration_in_colo_state())
>> >> + {
>> >
>> > Make the "MIGRATION_STATUS_COLO" into the " migration_is_running()"
>> is a better way?
>> 
>> I wasn't sure that that's correct.. migration_is_running() is used in several
>> places, to do so, I'd have to analyze them all.
>
> Actually, when running in the "MIGRATION_STATUS_COLO" means QEMU can not
> do a normal migration at the same time.
> Juan have any comments here?

My understanding of colo is pretty limited, but my mind explodes just
trying to think how many things we would have to check/do to do a
migration while in colo state.

So I guess you are right that we can't be in both at the same time.

Later, Juan.

Re: [PATCH v4 03/10] build: move COLO under CONFIG_REPLICATION

2023-05-09 Thread Juan Quintela

Vladimir Sementsov-Ogievskiy  wrote:
> We don't allow to use x-colo capability when replication is not
> configured. So, no reason to build COLO when replication is disabled,
> it's unusable in this case.
>
> Note also that the check in migrate_caps_check() is not the only
> restriction: some functions in migration/colo.c will just abort if
> called with not defined CONFIG_REPLICATION, for example:
>
> migration_iteration_finish()
>case MIGRATION_STATUS_COLO:
>migrate_start_colo_process()
>colo_process_checkpoint()
>abort()
>
> It could probably make sense to have possibility to enable COLO without
> REPLICATION, but this requires deeper audit of colo & replication code,
> which may be done later if needed.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> Acked-by: Dr. David Alan Gilbert 

Reviewed-by: Juan Quintela

[PATCH] multifd: Add colo support

2023-05-09 Thread Juan Quintela

From: Lukas Straub 

Like in the normal ram_load() path, put the received pages into the
colo cache and mark the pages in the bitmap so that they will be
flushed to the guest later.

Signed-off-by: Lukas Straub 

---

Hi Lukas

What about this instead of your other three patches?  I think it is
clearer, and I don't think that we are going to have anything else
that is going to hook there anytime soon.

Notice that I put CONFIG_COLO waiting for Vladimir changes to get in
before I merge this.

Notice also that I "lost" the line:

  p->host = p->block->host;

In the error case.  But in that case we are aborting the migration, so
we don't care.

Can you check if it works for you?
Here it compiles, so it must be perfect.

Thanks, Juan.
---
 migration/meson.build|  2 +-
 migration/multifd-colo.c | 49 
 migration/multifd-colo.h | 24 
 3 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 migration/multifd-colo.c
 create mode 100644 migration/multifd-colo.h

diff --git a/migration/meson.build b/migration/meson.build
index 75de868bb7..c9db40d4d4 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -23,7 +23,7 @@ softmmu_ss.add(files(
   'migration.c',
   'multifd.c',
   'multifd-zlib.c',
-  'multifd-zlib.c',
+  'multifd-colo.c',
   'ram-compress.c',
   'options.c',
   'postcopy-ram.c',
diff --git a/migration/multifd-colo.c b/migration/multifd-colo.c
new file mode 100644
index 00..10fa1467fa
--- /dev/null
+++ b/migration/multifd-colo.c
@@ -0,0 +1,49 @@
+/*
+ * multifd colo implementation
+ *
+ * Copyright (c) Lukas Straub 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/target_page.h"
+#include "exec/ramblock.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "ram.h"
+#include "multifd.h"
+#include "options.h"
+#include "io/channel-socket.h"
+#include "migration/colo.h"
+#include "multifd-colo.h"
+
+void multifd_colo_prepare_recv_pages(MultiFDRecvParams *p)
+{
+if (migrate_colo()) {
+/*
+ * While we're still in precopy mode, we copy received pages to both 
guest
+ * and cache. No need to set dirty bits, since guest and cache memory 
are
+ * in sync.
+ */
+if (migration_incoming_in_colo_state()) {
+colo_record_bitmap(p->block, p->normal, p->normal_num);
+}
+p->host = p->block->colo_cache;
+}
+}
+
+void multifd_colo_process_recv_pages(MultiFDRecvParams *p)
+{
+if (migrate_colo()) {
+if (!migration_incoming_in_colo_state()) {
+for (int i = 0; i < p->normal_num; i++) {
+void *guest = p->block->host + p->normal[i];
+void *cache = p->host + p->normal[i];
+memcpy(guest, cache, p->page_size);
+}
+}
+p->host = p->block->host;
+}
+}
diff --git a/migration/multifd-colo.h b/migration/multifd-colo.h
new file mode 100644
index 00..1636c617fc
--- /dev/null
+++ b/migration/multifd-colo.h
@@ -0,0 +1,24 @@
+/*
+ * multifd colo header
+ *
+ * Copyright (c) Lukas Straub 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_MIGRATION_MULTIFD_COLO_H
+#define QEMU_MIGRATION_MULTIFD_COLO_H
+
+#ifndef CONFIG_COLO
+
+void multifd_colo_prepare_recv_pages(MultiFDRecvParams *p);
+void multifd_colo_process_recv_pages(MultiFDRecvParams *p);
+
+#else
+
+static inline void multifd_colo_prepare_recv_pages(MultiFDRecvParams *p) {}
+static inline void multifd_colo_process_recv_pages(MultiFDRecvParams *p) {}
+
+#endif /* CONFIG_COLO */
+#endif
-- 
2.40.0

Re: [PATCH] make: clean after distclean deletes source files

2023-05-09 Thread Steven Sistare

Any takers?  I believe this patch is correct and clean.
Examples, run in the top level of a git tree:

$ configure ...
$ make clean
... cleans ...

$ make clean
... cleans ...

$ make distclean
... cleans ...

$ make distclean
Makefile:180: *** Please call configure before running make.  Stop.

$ make clean
Makefile:180: *** Please call configure before running make.  Stop.

# unchecked goals still work
$ make cscope
cscope  Remove old cscope files
cscope  Create file list
cscope  Re-index .

$ configure ...
$ make clean
... cleans ...

- Steve

On 4/19/2023 9:08 AM, Steve Sistare wrote:
> Run 'make distclean' in a tree, and GNUmakefile is removed.
> But, GNUmakefile is where we change directory to build.
> Run 'make distclean' or 'make clean' again, and Makefile applies
> the clean actions, such as this one, at the top level of the tree.
> For example, it removes the .d source files in 'meson/test cases/d/*/*.d'.
> 
> find . \( -name '*.so' -o -name '*.dll' -o \
>   -name '*.[oda]' -o -name '*.gcno' \) -type f \
> ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-aarch64.a \
> ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-arm.a \
> -exec rm {} +
> 
> To fix, remove clean and distclean from UNCHECKED_GOALS, so those targets
> are "checked", meaning that configure must be run before make.  However,
> the check action does not trigger, because clean does not depend on
> config-host.mak, so change the action to simply throw an error.
> 
> Signed-off-by: Steve Sistare 
> ---
>  Makefile | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/Makefile b/Makefile
> index e421f8a..30d61f8 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -26,7 +26,7 @@ quiet-command-run = $(if $(V),,$(if $2,printf "  %-7s %s\n" 
> $2 $3 && ))$1
>  quiet-@ = $(if $(V),,@)
>  quiet-command = $(quiet-@)$(call quiet-command-run,$1,$2,$3)
>  
> -UNCHECKED_GOALS := %clean TAGS cscope ctags dist \
> +UNCHECKED_GOALS := TAGS cscope ctags dist \
>  help check-help print-% \
>  docker docker-% vm-help vm-test vm-build-%
>  
> @@ -176,10 +176,8 @@ plugins:
>  endif # $(CONFIG_PLUGIN)
>  
>  else # config-host.mak does not exist
> -config-host.mak:
>  ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if 
> $(MAKECMDGOALS),,fail))
> - @echo "Please call configure before running make!"
> - @exit 1
> +$(error Please call configure before running make)
>  endif
>  endif # config-host.mak does not exist
>

Re: [PATCH 00/11] Fix qemu_strtosz() read-out-of-bounds

2023-05-09 Thread Hanna Czenczek


On 08.05.23 22:03, Eric Blake wrote:

This series blew up in my face when Hanna first pointed me to
https://gitlab.com/qemu-project/qemu/-/issues/1629

Basically, 'qemu-img dd bs=9.9e999' killed a sanitized build because
of a read-out-of-bounds (".9e999" parses as infinity, but qemu_strtosz
wasn't expecting ERANGE failure).

The overall diffstate is big, mainly because the unit tests needed a
LOT of work before I felt comfortable tweaking semantics in something
that is so essential to command-line and QMP parsing.

Eric Blake (11):
   test-cutils: Avoid g_assert in unit tests
   test-cutils: Use g_assert_cmpuint where appropriate
   test-cutils: Test integral qemu_strto* value on failures
   test-cutils: Add coverage of qemu_strtod
   test-cutils: Prepare for upcoming semantic change in qemu_strtosz
   test-cutils: Add more coverage to qemu_strtosz
   numa: Check for qemu_strtosz_MiB error
   cutils: Set value in all qemu_strtosz* error paths
   cutils: Set value in all integral qemu_strto* error paths
   cutils: Improve qemu_strtod* error paths
   cutils: Improve qemu_strtosz handling of fractions

  hw/core/numa.c   |   11 +-
  tests/unit/test-cutils.c | 1213 ++
  util/cutils.c|  180 --
  3 files changed, 1090 insertions(+), 314 deletions(-)


Patches 1 – 5, 7 – 10:

Reviewed-by: Hanna Czenczek

Re: [PATCH 11/11] cutils: Improve qemu_strtosz handling of fractions

2023-05-09 Thread Hanna Czenczek


On 08.05.23 22:03, Eric Blake wrote:

We have several limitations and bugs worth fixing; they are
inter-related enough that it is not worth splitting this patch into
smaller pieces:

* ".5k" should work to specify 512, just as "0.5k" does
* "1.k" and "1." + "9"*50 + "k" should both produce the same
   result of 2048 after rounding
* "1." + "0"*350 + "1B" should not be treated the same as "1.0B";
   underflow in the fraction should not be lost
* "7.99e99" and "7.99e999" look similar, but our code was doing a
   read-out-of-bounds on the latter because it was not expecting ERANGE
   due to overflow. While we document that scientific notation is not
   supported, and the previous patch actually fixed
   qemu_strtod_finite() to no longer return ERANGE overflows, it is
   easier to pre-filter than to try and determine after the fact if
   strtod() consumed more than we wanted.  Note that this is a
   low-level semantic change (when endptr is not NULL, we can now
   successfully parse with a scale of 'E' and then report trailing
   junk, instead of failing outright with EINVAL); but an earlier
   commit already argued that this is not a high-level semantic change
   since the only caller passing in a non-NULL endptr also checks that
   the tail is whitespace-only.

Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1629
Signed-off-by: Eric Blake 
---
  tests/unit/test-cutils.c | 51 +++
  util/cutils.c| 89 
  2 files changed, 88 insertions(+), 52 deletions(-)


[...]


diff --git a/util/cutils.c b/util/cutils.c
index 0e056a27a44..d1dfbc69d16 100644
--- a/util/cutils.c
+++ b/util/cutils.c


[...]


@@ -246,27 +244,66 @@ static int do_strtosz(const char *nptr, const char **end,
  retval = -EINVAL;
  goto out;
  }
-} else if (*endptr == '.') {
+} else if (*endptr == '.' || (endptr == nptr && strchr(nptr, '.'))) {


What case is there where we have a fraction but *endptr != '.'?


  /*
   * Input looks like a fraction.  Make sure even 1.k works
- * without fractional digits.  If we see an exponent, treat
- * the entire input as invalid instead.
+ * without fractional digits.  strtod tries to treat 'e' as an
+ * exponent, but we want to treat it as a scaling suffix;
+ * doing this requires modifying a copy of the fraction.
   */
-double fraction;
+double fraction = 0.0;

-f = endptr;
-retval = qemu_strtod_finite(f, , );
-if (retval) {
+if (retval == 0 && *endptr == '.' && !isdigit(endptr[1])) {
+/* If we got here, we parsed at least one digit already. */
  endptr++;
-} else if (memchr(f, 'e', endptr - f) || memchr(f, 'E', endptr - f)) {
-endptr = nptr;
-retval = -EINVAL;
-goto out;
  } else {
-/* Extract into a 64-bit fixed-point fraction. */
+char *e;
+const char *tail;
+g_autofree char *copy = g_strdup(endptr);
+
+e = strchr(copy, 'e');
+if (e) {
+*e = '\0';
+}
+e = strchr(copy, 'E');
+if (e) {
+*e = '\0';
+}
+/*
+ * If this is a floating point, we are guaranteed that '.'
+ * appears before any possible digits in copy.  If it is
+ * not a floating point, strtod will fail.  Either way,
+ * there is now no exponent in copy, so if it parses, we
+ * know 0.0 <= abs(result) <= 1.0 (after rounding), and
+ * ERANGE is only possible on underflow which is okay.
+ */
+retval = qemu_strtod_finite(copy, , );
+endptr += tail - copy;
+}
+
+/* Extract into a 64-bit fixed-point fraction. */
+if (fraction == 1.0) {
+if (val == UINT64_MAX) {
+retval = -ERANGE;
+goto out;
+}
+val++;
+} else if (retval == -ERANGE) {
+/* See comments above about underflow */
+valf = 1;


It doesn’t really matter because even an EiB is just 2^60, and so 1 EiB 
* 2^-64 (the resolution of our fractional part) is still less than 1, but:


DBL_MIN * 0x1p64 is 2^-(1022-64) == 2^-958, i.e. much less than 1, so 
I’d set valf to 0 here.


(If you put “.0001” into this, there won’t be an 
underflow, but the value is so small that valf ends up 0.  But if you 
put `.$(yes 0 | head -n 307 | tr -d '\n')1` into this, there will be an 
underflow, setting valf to 1, even though the value is smaller.)


Hanna


+retval = 0;
+} else {
  valf = (uint64_t)(fraction * 0x1p64);
  }
  }
+if (retval) {
+goto out;
+}
+if (memchr(nptr, '-', endptr - nptr) != NULL) {
+endptr = nptr;
+retval =

Re: [PATCH v3 0/5] virtio-gpu cleanups and obvious definitions

2023-05-09 Thread Gurchetan Singh

On Tue, May 9, 2023 at 5:43 AM Marc-André Lureau 
wrote:

> Hi
>
> On Thu, May 4, 2023 at 11:13 PM Gurchetan Singh <
> gurchetansi...@chromium.org> wrote:
>
>> From: Gurchetan Singh 
>>
>> v3 of "virtio-gpu cleanups and obvious definitions"
>>
>> https://lists.gnu.org/archive/html/qemu-devel/2023-04/msg05392.html
>>
>> All patches have been reviewed, though there was a question from
>> Bernhard Beschow about patch (3) and how it fits with the QOM:
>>
>> https://lists.gnu.org/archive/html/qemu-devel/2023-05/msg00057.html
>>
>> I go into detail in patch 3 commit message, but I think we meet
>> the requirements (which are tricky/fuzzy anyways).  Also, I think
>> this is the cleanest way to add another 3D virtgpu backend.  But
>> if anyone has other ideas, please do reply/review.
>>
>> Antonio Caggiano (1):
>>   virtio-gpu: CONTEXT_INIT feature
>>
>> Dr. David Alan Gilbert (1):
>>   virtio: Add shared memory capability
>>
>> Gurchetan Singh (3):
>>   hw/display/virtio-gpu-virgl: virtio_gpu_gl -> virtio_gpu_virgl
>>   hw/display/virtio-gpu-virgl: make GL device more library agnostic
>>   hw/display/virtio-gpu-virgl: define callbacks in realize function
>>
>>  hw/display/virtio-gpu-base.c   |   3 +
>>  hw/display/virtio-gpu-gl.c | 114 +--
>>  hw/display/virtio-gpu-virgl.c  | 137 +++--
>>  hw/virtio/virtio-pci.c |  18 +
>>  include/hw/virtio/virtio-gpu.h |  11 +--
>>  include/hw/virtio/virtio-pci.h |   4 +
>>  6 files changed, 160 insertions(+), 127 deletions(-)
>>
>> --
>> 2.40.1.521.gf1e218fcd8-goog
>>
>>
>>
> This looks fine to me:
> Reviewed-by: Marc-André Lureau 
>
> however, do you have a series rebased on top that makes use of those
> changes? (I think we may want to delay merging this one until it's actually
> needed)
>

Got it.  Bernhard actually recommended a separate virtio-gpu-rutabaga
device instead of virtio-gpu-gl.  I went with that approach:

https://lists.gnu.org/archive/html/qemu-devel/2023-05/msg01571.html
https://gitlab.freedesktop.org/gurchetansingh/qemu-gfxstream/-/commits/qemu-gfxstream-3

I'll send out a new full-featured series once testing/hack cleanups are
complete -- so no further action is needed on your part.


>
>
> --
> Marc-André Lureau
>

Re: [PATCH v5 00/21] block: remove aio_disable_external() API

2023-05-09 Thread Stefan Hajnoczi

On Thu, May 04, 2023 at 11:44:42PM +0200, Kevin Wolf wrote:
> Am 04.05.2023 um 21:53 hat Stefan Hajnoczi geschrieben:
> > v5:
> > - Use atomic accesses for in_flight counter in vhost-user-server.c [Kevin]
> > - Stash SCSIDevice id/lun values for VIRTIO_SCSI_T_TRANSPORT_RESET event
> >   before unrealizing the SCSIDevice [Kevin]
> > - Keep vhost-user-blk export .detach() callback so ctx is set to NULL 
> > [Kevin]
> > - Narrow BdrvChildClass and BlockDriver drained_{begin/end/poll} callbacks 
> > from
> >   IO_OR_GS_CODE() to GLOBAL_STATE_CODE() [Kevin]
> > - Include Kevin's "block: Fix use after free in blockdev_mark_auto_del()" to
> >   fix a latent bug that was exposed by this series
> 
> I only just finished reviewing v4 when you had already sent v5, but it
> hadn't arrived yet. I had a few more comments on what are now patches
> 17, 18, 19 and 21 in v5. I think they all still apply.

I'm not sure which comments from v4 still apply. In my email client all
your replies were already read when I sent v5.

Maybe you can share the Message-Id of something I still need to address?

Thanks,
Stefan


signature.asc
Description: PGP signature

Re: [PATCH v2 00/20] Graph locking, part 3 (more block drivers)

2023-05-09 Thread Stefan Hajnoczi

On Thu, May 04, 2023 at 01:57:30PM +0200, Kevin Wolf wrote:
> The first few patches in this series fix coroutine correctness problems
> that have existed for a while, but only actually start to make things
> fail in practice with stricter checks that we're going to introduce with
> the graph locking work.
> 
> The rest of the series makes sure that the graph lock is held in more
> block driver functions that access the children or parents list of a
> block node. This is incremental work and more patches are yet to come.
> 
> v2:
> - Rebased on current git master
> - Improved some commit messages
> - Patch 5: Added missing coroutine_fn annotations in test
> - Patch 7: Updated comments, too
> 
> I didn't remove the assertion in patch 13 yet which Stefan was
> questioning. I can remove it while applying if we decide that we really
> don't want it.

That's fine, we can leave them for now.

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: css_clear_io_interrupt() error handling

2023-05-09 Thread Halil Pasic

On Mon, 08 May 2023 11:01:55 +0200
Cornelia Huck  wrote:

> On Mon, May 08 2023, Markus Armbruster  wrote:
> 
> > css_clear_io_interrupt() aborts on unexpected ioctl() errors, and I
> > wonder whether that's appropriate.  Let's have a closer look:

Just for my understanding, was there a field problem with this code,
or is it more a theoretical (i.e. no know crashes)?

> >
> > static void css_clear_io_interrupt(uint16_t subchannel_id,
> >uint16_t subchannel_nr)
> > {
> > Error *err = NULL;
> > static bool no_clear_irq;
> > S390FLICState *fs = s390_get_flic();
> > S390FLICStateClass *fsc = s390_get_flic_class(fs);
> > int r;
> >
> > if (unlikely(no_clear_irq)) {
> > return;
> > }
> > r = fsc->clear_io_irq(fs, subchannel_id, subchannel_nr);
> > switch (r) {
> > case 0:
> > break;
> > case -ENOSYS:
> > no_clear_irq = true;
> > /*
> > * Ignore unavailability, as the user can't do anything
> > * about it anyway.
> > */
> > break;
> > default:
> > error_setg_errno(, -r, "unexpected error condition");
> > error_propagate(_abort, err);
> > }
> > }
> >
> > The default case is abort() with a liberal amount of lipstick applied.
> > Let's ignore the lipstick and focus on the abort().

Nod.

> >
> > fsc->clear_io_irq ist either qemu_s390_clear_io_flic() order
> > kvm_s390_clear_io_flic().

Right.

> >
> > Only kvm_s390_clear_io_flic() can return non-zero: -errno when ioctl()
> > fails.

Agreed, this is the case right now. This was not the case when the code
was written qemu_s390_clear_io_flic() used to be missing functionality
and always returned -ENOSYS.

> >
> > The ioctl() is KVM_SET_DEVICE_ATTR for KVM_DEV_FLIC_CLEAR_IO_IRQ with
> > subchannel_id and subchannel_nr.  I.e. we assume that this can only fail
> > with ENOSYS, und crash hard when the assumption turns out to be wrong.

Yes this is the assumption and the current behavior.

> >
> > Is this error condition a programming error?  I figure it can be one
> > only if the ioctl()'s contract promises us it cannot fail in any other
> > way unless we violate preconditions.

AFAIK and AFAIR it is indeed only possible in case of a programming error
somewhere, and this was almost certainly my intention with this code. 

For example if the future implementer of a meaningful
qemu_s390_clear_io_flic() was to decide to use a multitude of error
codes, the implementer would also have to touch this and handle those
accordingly to avoid crashes.

On the ioctl() is KVM_SET_DEVICE_ATTR for KVM_DEV_FLIC_CLEAR_IO_IRQ I'm
afraid there is no really authoritative contract, and the current
implementation, the documentation under Documentation/virt/kvm in
the Linux source tree and this code in QEMU are the de-facto contract. 

linux/Documentation/virt/kvm/api.rst says
"""
4.81 KVM_HAS_DEVICE_ATTR

:Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
 KVM_CAP_VCPU_ATTRIBUTES for vcpu device
 KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device
:Type: device ioctl, vm ioctl, vcpu ioctl
:Parameters: struct kvm_device_attr
:Returns: 0 on success, -1 on error

Errors:

  =   =
  ENXIO   The group or attribute is unknown/unsupported for this device
  or hardware support is missing.
  =   =

Tests whether a device supports a particular attribute.  A successful
return indicates the attribute is implemented.  It does not necessarily
indicate that the attribute can be read or written in the device's
current state.  "addr" is ignored.
"""

and we do check for availability and cover that via -ENOSYS.

For KVM_DEV_FLIC_CLEAR_IO_IRQ is just the following error code
documented in linux/Documentation/virt/kvm/devices/s390_flic.rst
which is to my knowledge the most authoritative source.
"""
.. note:: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a
  zero schid is specified
"""
but a look in the code will tell us that -EFAULT is also possible if the
supplied address is broken.

To sum it up, there is nothing to go wrong with the given operation, and
to my best knowledge seeing an error code on the ioctl would either
indicate a programming error on the client side (QEMU messed it up) or
there is something wrong with the kernel.

> >
> > Is the error condition fatal, i.e. continuing would be unsafe?

If the kernel is broken, probably. It is certainly unexpected.

> >
> > If it's a fatal programming error, then abort() is appropriate.
> >
> > If it's fatal, but not a programming error, we should exit(1) instead.

It might not be a QEMU programming error. I really see no reason why
would a combination of a sane

Re: [PATCH 2/4] vhost-user: Interface for migration state transfer

2023-05-09 Thread Stefan Hajnoczi

On Tue, 9 May 2023 at 11:35, Eugenio Perez Martin  wrote:
>
> On Tue, May 9, 2023 at 5:09 PM Stefan Hajnoczi  wrote:
> >
> > On Tue, May 09, 2023 at 08:45:33AM +0200, Eugenio Perez Martin wrote:
> > > On Mon, May 8, 2023 at 10:10 PM Stefan Hajnoczi  
> > > wrote:
> > > >
> > > > On Thu, Apr 20, 2023 at 03:29:44PM +0200, Eugenio Pérez wrote:
> > > > > On Wed, 2023-04-19 at 07:21 -0400, Stefan Hajnoczi wrote:
> > > > > > On Wed, 19 Apr 2023 at 07:10, Hanna Czenczek  
> > > > > > wrote:
> > > > > > > On 18.04.23 09:54, Eugenio Perez Martin wrote:
> > > > > > > > On Mon, Apr 17, 2023 at 9:21 PM Stefan Hajnoczi 
> > > > > > > > 
> > > > > > > > wrote:
> > > > > > > > > On Mon, 17 Apr 2023 at 15:08, Eugenio Perez Martin 
> > > > > > > > > 
> > > > > > > > > wrote:
> > > > > > > > > > On Mon, Apr 17, 2023 at 7:14 PM Stefan Hajnoczi 
> > > > > > > > > > 
> > > > > > > > > > wrote:
> > > > > > > > > > > On Thu, Apr 13, 2023 at 12:14:24PM +0200, Eugenio Perez 
> > > > > > > > > > > Martin
> > > > > > > > > > > wrote:
> > > > > > > > > > > > On Wed, Apr 12, 2023 at 11:06 PM Stefan Hajnoczi <
> > > > > > > > > > > > stefa...@redhat.com> wrote:
> > > > > > > > > > > > > On Tue, Apr 11, 2023 at 05:05:13PM +0200, Hanna 
> > > > > > > > > > > > > Czenczek wrote:
> > > > > > > > > > > > > > So-called "internal" virtio-fs migration refers to
> > > > > > > > > > > > > > transporting the
> > > > > > > > > > > > > > back-end's (virtiofsd's) state through qemu's 
> > > > > > > > > > > > > > migration
> > > > > > > > > > > > > > stream.  To do
> > > > > > > > > > > > > > this, we need to be able to transfer virtiofsd's 
> > > > > > > > > > > > > > internal
> > > > > > > > > > > > > > state to and
> > > > > > > > > > > > > > from virtiofsd.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Because virtiofsd's internal state will not be too 
> > > > > > > > > > > > > > large, we
> > > > > > > > > > > > > > believe it
> > > > > > > > > > > > > > is best to transfer it as a single binary blob 
> > > > > > > > > > > > > > after the
> > > > > > > > > > > > > > streaming
> > > > > > > > > > > > > > phase.  Because this method should be useful to 
> > > > > > > > > > > > > > other vhost-
> > > > > > > > > > > > > > user
> > > > > > > > > > > > > > implementations, too, it is introduced as a 
> > > > > > > > > > > > > > general-purpose
> > > > > > > > > > > > > > addition to
> > > > > > > > > > > > > > the protocol, not limited to vhost-user-fs.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > These are the additions to the protocol:
> > > > > > > > > > > > > > - New vhost-user protocol feature
> > > > > > > > > > > > > > VHOST_USER_PROTOCOL_F_MIGRATORY_STATE:
> > > > > > > > > > > > > >This feature signals support for transferring 
> > > > > > > > > > > > > > state, and is
> > > > > > > > > > > > > > added so
> > > > > > > > > > > > > >that migration can fail early when the back-end 
> > > > > > > > > > > > > > has no
> > > > > > > > > > > > > > support.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > - SET_DEVICE_STATE_FD function: Front-end and 
> > > > > > > > > > > > > > back-end
> > > > > > > > > > > > > > negotiate a pipe
> > > > > > > > > > > > > >over which to transfer the state.  The front-end 
> > > > > > > > > > > > > > sends an
> > > > > > > > > > > > > > FD to the
> > > > > > > > > > > > > >back-end into/from which it can write/read its 
> > > > > > > > > > > > > > state, and
> > > > > > > > > > > > > > the back-end
> > > > > > > > > > > > > >can decide to either use it, or reply with a 
> > > > > > > > > > > > > > different FD
> > > > > > > > > > > > > > for the
> > > > > > > > > > > > > >front-end to override the front-end's choice.
> > > > > > > > > > > > > >The front-end creates a simple pipe to transfer 
> > > > > > > > > > > > > > the state,
> > > > > > > > > > > > > > but maybe
> > > > > > > > > > > > > >the back-end already has an FD into/from which 
> > > > > > > > > > > > > > it has to
> > > > > > > > > > > > > > write/read
> > > > > > > > > > > > > >its state, in which case it will want to 
> > > > > > > > > > > > > > override the
> > > > > > > > > > > > > > simple pipe.
> > > > > > > > > > > > > >Conversely, maybe in the future we find a way to 
> > > > > > > > > > > > > > have the
> > > > > > > > > > > > > > front-end
> > > > > > > > > > > > > >get an immediate FD for the migration stream (in 
> > > > > > > > > > > > > > some
> > > > > > > > > > > > > > cases), in which
> > > > > > > > > > > > > >case we will want to send this to the back-end 
> > > > > > > > > > > > > > instead of
> > > > > > > > > > > > > > creating a
> > > > > > > > > > > > > >pipe.
> > > > > > > > > > > > > >Hence the negotiation: If one side has a better 
> > > > > > > > > > > > > > idea than a
> > > > > > > > > > > > > > plain
> > > > > > > > > > > > > >pipe, we will want to use that.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > -

Re: [PULL 00/17] QAPI patches patches for 2023-05-09

2023-05-09 Thread Paolo Bonzini


On 5/9/23 18:20, Richard Henderson wrote:


This has a number of CI failures, including

https://gitlab.com/qemu-project/qemu/-/jobs/4252925621#L4673
../docs/meson.build:27:6: ERROR: Problem encountered: Install a Python 3 
version of python-sphinx and the readthedoc theme




No Python 3.8 yet (needed for assignment expressions aka the walrus 
operator).  The plan was to require that either as soon as Debian 11 
comes out, which would be in a couple months right before 8.1 freeze, or 
for 8.2.


Paolo

Re: [PATCH] block: compile out assert_bdrv_graph_readable() by default

2023-05-09 Thread Kevin Wolf

Am 01.05.2023 um 19:34 hat Stefan Hajnoczi geschrieben:
> reader_count() is a performance bottleneck because the global
> aio_context_list_lock mutex causes thread contention. Put this debugging
> assertion behind a new ./configure --enable-debug-graph-lock option and
> disable it by default.
> 
> The --enable-debug-graph-lock option is also enabled by the more general
> --enable-debug option.
> 
> Signed-off-by: Stefan Hajnoczi 

Thanks, applied to the block branch.

Kevin

RE: [PATCH v2] Hexagon (decode): look for pkts with multiple insns at the same slot

2023-05-09 Thread Taylor Simpson




> -Original Message-
> From: Taylor Simpson
> Sent: Tuesday, May 9, 2023 11:46 AM
> To: Matheus Tavares Bernardino ; qemu-
> de...@nongnu.org
> Subject: RE: [PATCH v2] Hexagon (decode): look for pkts with multiple insns
> at the same slot
> 
> 
> 
> > -Original Message-
> > From: Matheus Tavares Bernardino 
> > Sent: Monday, May 8, 2023 8:37 AM
> > To: qemu-devel@nongnu.org
> > Cc: Taylor Simpson 
> > Subject: [PATCH v2] Hexagon (decode): look for pkts with multiple insns at
> > the same slot
> >
> > Each slot in a packet can be assigned to at most one instruction.
> > Although the assembler generally ought to enforce this rule, we better be
> > safe than sorry and also do some check to properly throw an "invalid
> packet"
> > exception on wrong slot assignments.
> >
> > This should also make it easier to debug possible future errors caused by
> > missing updates to `find_iclass_slots()` rules in target/hexagon/iclass.c.
> >
> > Co-authored-by: Taylor Simpson 
> > Signed-off-by: Taylor Simpson 
> > Signed-off-by: Matheus Tavares Bernardino
> 
> > ---
> > Changes in v2:
> > - Only call decode_set_slot_number() with !disas_only, fixing the -d
> >   in_asm case.
> >
> > v1: https://lore.kernel.org/qemu-
> >
> devel/7a90f0925f182e56cf49ec3ec01484739fa2f174.1683226473.git.quic_mat
> > hb...@quicinc.com/
> >
> >  target/hexagon/decode.c   | 30 +++---
> >  tests/tcg/hexagon/invalid-slots.c | 29
> +
> > tests/tcg/hexagon/Makefile.target | 11 +++
> >  3 files changed, 67 insertions(+), 3 deletions(-)  create mode 100644
> > tests/tcg/hexagon/invalid-slots.c
> >
> > diff --git a/tests/tcg/hexagon/Makefile.target
> > b/tests/tcg/hexagon/Makefile.target
> > index 7c94db4bc4..0c69216c6c 100644
> > --- a/tests/tcg/hexagon/Makefile.target
> > +++ b/tests/tcg/hexagon/Makefile.target
> > @@ -49,6 +49,17 @@ HEX_TESTS += vector_add_int  HEX_TESTS +=
> > scatter_gather  HEX_TESTS += hvx_misc  HEX_TESTS += hvx_histogram
> > +HEX_TESTS += invalid-slots
> > +
> > +run-and-check-exception = $(call run-test,$2,$3 2>$2.stderr; \
> > +   test $$? -eq 1 && grep -q "exception $(strip $1)" $2.stderr)
> > +
> > +run-invalid-slots: invalid-slots
> > +   $(call run-and-check-exception, 0x15, $@, $(QEMU) $(QEMU_OPTS)
> > $<)
> > +
> > +run-plugin-invalid-slots-with-%: invalid-slots
> > +   $(call run-and-check-exception, 0x15, $@, $(QEMU) $(QEMU_OPTS)
> > \
> > +   -plugin $(PLUGIN_LIB)/$(call extract-plugin,$@) $(call
> > +strip-plugin,$<))
> 
> This isn't invoked and is missing some pieces.  I'll remove it from the PR.
> 
> Otherwise,
> Reviewed-by: Taylor Simpson 

Also
Tested-by: Taylor Simpson

Re: [PATCH v2 00/20] Graph locking, part 3 (more block drivers)

2023-05-09 Thread Kevin Wolf

Am 04.05.2023 um 13:57 hat Kevin Wolf geschrieben:
> The first few patches in this series fix coroutine correctness problems
> that have existed for a while, but only actually start to make things
> fail in practice with stricter checks that we're going to introduce with
> the graph locking work.
> 
> The rest of the series makes sure that the graph lock is held in more
> block driver functions that access the children or parents list of a
> block node. This is incremental work and more patches are yet to come.
> 
> v2:
> - Rebased on current git master
> - Improved some commit messages
> - Patch 5: Added missing coroutine_fn annotations in test
> - Patch 7: Updated comments, too

Thanks for the review, applied to the block branch.

Kevin

[PATCH] migration: Fix duplicated included in meson.build

2023-05-09 Thread Juan Quintela

This is the commint with the merge error (not in the submited patch).

commit 52623f23b0d114837a0d6278180b3e3ae8947117
Author: Lukas Straub 
Date:   Thu Apr 20 11:48:35 2023 +0200

ram-compress.c: Make target independent

Make ram-compress.c target independent.

Fixes: 52623f23b0d114837a0d6278180b3e3ae8947117
Signed-off-by: Juan Quintela 
---
 migration/meson.build | 1 -
 1 file changed, 1 deletion(-)

diff --git a/migration/meson.build b/migration/meson.build
index b81ccaec13..6b5c5a02f5 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -23,7 +23,6 @@ softmmu_ss.add(files(
   'migration.c',
   'multifd.c',
   'multifd-zlib.c',
-  'multifd-zlib.c',
   'ram-compress.c',
   'options.c',
   'postcopy-ram.c',
-- 
2.40.0

Re: ssl fips self check fails with 7.2.0 on x86 TCG

2023-05-09 Thread Michael Tokarev


09.05.2023 17:39, Philippe Mathieu-Daudé пишет:
..> Should be fixed in v7.2-stable:


$ git log --oneline --grep=1d0b9261 v7.2.2
c45d10f655 target/i386: fix ADOX followed by ADCX
6809dbc5c5 target/i386: Fix C flag for BLSI, BLSMSK, BLSR
8d3c9fc439 target/i386: Fix BEXTR instruction


Unfortunately it is still not released, -
I haven't heard anything from Michael Roth since Apr-22 (when 7.2.2 planned).

/mjt

Re: [PTACH v2 2/6] target/riscv: support the AIA device emulateion with KVM enabled

2023-05-09 Thread Andrew Jones



sed 's/emulateion/emulation' <<<$SUBJECT

and for the whole series

sed 's/PTACH/PATCH/' <<<$SUBJECT

Thanks,
drew

On Fri, May 05, 2023 at 11:39:37AM +, Yong-Xuan Wang wrote:
> Remove M mode AIA devices when using KVM acceleration
> 
> Signed-off-by: Yong-Xuan Wang 
> Reviewed-by: Jim Shu 
> ---
>  hw/riscv/virt.c | 198 +---
>  1 file changed, 104 insertions(+), 94 deletions(-)
> 
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 4e3efbee16..396025b5a5 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -531,52 +531,53 @@ static void create_fdt_imsic(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>  imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
>  imsic_regs = g_new0(uint32_t, socket_count * 4);
>  
> -/* M-level IMSIC node */
> -for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
> -imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
> -imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT);
> -}
> -imsic_max_hart_per_socket = 0;
> -for (socket = 0; socket < socket_count; socket++) {
> -imsic_addr = memmap[VIRT_IMSIC_M].base +
> - socket * VIRT_IMSIC_GROUP_MAX_SIZE;
> -imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts;
> -imsic_regs[socket * 4 + 0] = 0;
> -imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
> -imsic_regs[socket * 4 + 2] = 0;
> -imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
> -if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
> -imsic_max_hart_per_socket = s->soc[socket].num_harts;
> +if (!kvm_enabled()) {
> +/* M-level IMSIC node */
> +for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
> +imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
> +imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT);
>  }
> +imsic_max_hart_per_socket = 0;
> +for (socket = 0; socket < socket_count; socket++) {
> +imsic_addr = memmap[VIRT_IMSIC_M].base +
> + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
> +imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts;
> +imsic_regs[socket * 4 + 0] = 0;
> +imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
> +imsic_regs[socket * 4 + 2] = 0;
> +imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
> +if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
> +imsic_max_hart_per_socket = s->soc[socket].num_harts;
> +}
> +}
> +imsic_name = g_strdup_printf("/soc/imsics@%lx",
> +(unsigned long)memmap[VIRT_IMSIC_M].base);
> +qemu_fdt_add_subnode(ms->fdt, imsic_name);
> +qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible",
> +"riscv,imsics");
> +qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
> +FDT_IMSIC_INT_CELLS);
> +qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller",
> +NULL, 0);
> +qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller",
> +NULL, 0);
> +qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
> +imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
> +qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
> +socket_count * sizeof(uint32_t) * 4);
> +qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids",
> +VIRT_IRQCHIP_NUM_MSIS);
> +if (socket_count > 1) {
> +qemu_fdt_setprop_cell(ms->fdt, imsic_name, 
> "riscv,hart-index-bits",
> +imsic_num_bits(imsic_max_hart_per_socket));
> +qemu_fdt_setprop_cell(ms->fdt, imsic_name, 
> "riscv,group-index-bits",
> +imsic_num_bits(socket_count));
> +qemu_fdt_setprop_cell(ms->fdt, imsic_name,
> +"riscv,group-index-shift", IMSIC_MMIO_GROUP_MIN_SHIFT);
> +}
> +qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", 
> *msi_m_phandle);
> +g_free(imsic_name);
>  }
> -imsic_name = g_strdup_printf("/soc/imsics@%lx",
> -(unsigned long)memmap[VIRT_IMSIC_M].base);
> -qemu_fdt_add_subnode(ms->fdt, imsic_name);
> -qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible",
> -"riscv,imsics");
> -qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells",
> -FDT_IMSIC_INT_CELLS);
> -qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller",
> -NULL, 0);
> -qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller",
> -NULL, 0);
> -qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended",
> -imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2);
> -qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs,
> -socket_count * sizeof(uint32_t) * 4);
> -qemu_fdt_setprop_cell(ms->fdt, imsic_name,

RE: [PATCH v2] Hexagon (decode): look for pkts with multiple insns at the same slot

2023-05-09 Thread Taylor Simpson




> -Original Message-
> From: Matheus Tavares Bernardino 
> Sent: Monday, May 8, 2023 8:37 AM
> To: qemu-devel@nongnu.org
> Cc: Taylor Simpson 
> Subject: [PATCH v2] Hexagon (decode): look for pkts with multiple insns at
> the same slot
> 
> Each slot in a packet can be assigned to at most one instruction.
> Although the assembler generally ought to enforce this rule, we better be
> safe than sorry and also do some check to properly throw an "invalid packet"
> exception on wrong slot assignments.
> 
> This should also make it easier to debug possible future errors caused by
> missing updates to `find_iclass_slots()` rules in target/hexagon/iclass.c.
> 
> Co-authored-by: Taylor Simpson 
> Signed-off-by: Taylor Simpson 
> Signed-off-by: Matheus Tavares Bernardino 
> ---
> Changes in v2:
> - Only call decode_set_slot_number() with !disas_only, fixing the -d
>   in_asm case.
> 
> v1: https://lore.kernel.org/qemu-
> devel/7a90f0925f182e56cf49ec3ec01484739fa2f174.1683226473.git.quic_mat
> hb...@quicinc.com/
> 
>  target/hexagon/decode.c   | 30 +++---
>  tests/tcg/hexagon/invalid-slots.c | 29 +
> tests/tcg/hexagon/Makefile.target | 11 +++
>  3 files changed, 67 insertions(+), 3 deletions(-)  create mode 100644
> tests/tcg/hexagon/invalid-slots.c
> 
> diff --git a/tests/tcg/hexagon/Makefile.target
> b/tests/tcg/hexagon/Makefile.target
> index 7c94db4bc4..0c69216c6c 100644
> --- a/tests/tcg/hexagon/Makefile.target
> +++ b/tests/tcg/hexagon/Makefile.target
> @@ -49,6 +49,17 @@ HEX_TESTS += vector_add_int  HEX_TESTS +=
> scatter_gather  HEX_TESTS += hvx_misc  HEX_TESTS += hvx_histogram
> +HEX_TESTS += invalid-slots
> +
> +run-and-check-exception = $(call run-test,$2,$3 2>$2.stderr; \
> + test $$? -eq 1 && grep -q "exception $(strip $1)" $2.stderr)
> +
> +run-invalid-slots: invalid-slots
> + $(call run-and-check-exception, 0x15, $@, $(QEMU) $(QEMU_OPTS)
> $<)
> +
> +run-plugin-invalid-slots-with-%: invalid-slots
> + $(call run-and-check-exception, 0x15, $@, $(QEMU) $(QEMU_OPTS)
> \
> + -plugin $(PLUGIN_LIB)/$(call extract-plugin,$@) $(call
> +strip-plugin,$<))

This isn't invoked and is missing some pieces.  I'll remove it from the PR.

Otherwise,
Reviewed-by: Taylor Simpson

[PATCH v2 1/5] disas: Move disas.c to disas/

2023-05-09 Thread Richard Henderson

Reviewed-by: Thomas Huth 
Signed-off-by: Richard Henderson 
Message-Id: <20230503072331.1747057-80-richard.hender...@linaro.org>
---
 meson.build  | 3 ---
 disas.c => disas/disas.c | 0
 disas/meson.build| 4 +++-
 3 files changed, 3 insertions(+), 4 deletions(-)
 rename disas.c => disas/disas.c (100%)

diff --git a/meson.build b/meson.build
index 229eb585f7..d56358b65f 100644
--- a/meson.build
+++ b/meson.build
@@ -3152,9 +3152,6 @@ specific_ss.add(files('cpu.c'))
 
 subdir('softmmu')
 
-common_ss.add(capstone)
-specific_ss.add(files('disas.c'), capstone)
-
 # Work around a gcc bug/misfeature wherein constant propagation looks
 # through an alias:
 #   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99696
diff --git a/disas.c b/disas/disas.c
similarity index 100%
rename from disas.c
rename to disas/disas.c
diff --git a/disas/meson.build b/disas/meson.build
index c865bdd882..cbf6315f25 100644
--- a/disas/meson.build
+++ b/disas/meson.build
@@ -10,4 +10,6 @@ common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: 
files('riscv.c'))
 common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
 common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
 common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
-common_ss.add(when: capstone, if_true: files('capstone.c'))
+common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
+
+specific_ss.add(files('disas.c'), capstone)
-- 
2.34.1

[PATCH v2 3/5] disas: Remove target-specific headers

2023-05-09 Thread Richard Henderson

Reviewed-by: Thomas Huth 
Signed-off-by: Richard Henderson 
Message-Id: <20230503072331.1747057-83-richard.hender...@linaro.org>
---
 include/disas/disas.h | 6 --
 disas/disas.c | 3 ++-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/include/disas/disas.h b/include/disas/disas.h
index 6c394e0b09..176775eff7 100644
--- a/include/disas/disas.h
+++ b/include/disas/disas.h
@@ -1,11 +1,6 @@
 #ifndef QEMU_DISAS_H
 #define QEMU_DISAS_H
 
-#include "exec/hwaddr.h"
-
-#ifdef NEED_CPU_H
-#include "cpu.h"
-
 /* Disassemble this for me please... (debugging). */
 void disas(FILE *out, const void *code, size_t size);
 void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
@@ -17,7 +12,6 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
 
 /* Look up symbol for debugging purpose.  Returns "" if unknown. */
 const char *lookup_symbol(uint64_t orig_addr);
-#endif
 
 struct syminfo;
 struct elf32_sym;
diff --git a/disas/disas.c b/disas/disas.c
index f5e95043cf..a709831e8d 100644
--- a/disas/disas.c
+++ b/disas/disas.c
@@ -3,9 +3,10 @@
 #include "disas/dis-asm.h"
 #include "elf.h"
 #include "qemu/qemu-print.h"
-
 #include "disas/disas.h"
 #include "disas/capstone.h"
+#include "hw/core/cpu.h"
+#include "exec/memory.h"
 
 typedef struct CPUDebug {
 struct disassemble_info info;
-- 
2.34.1

[PATCH v2 5/5] disas: Move disas.c into the target-independent source set

2023-05-09 Thread Richard Henderson

From: Thomas Huth 

By using target_words_bigendian() instead of an ifdef,
we can build this code once.

Signed-off-by: Thomas Huth 
[rth: Type change done in a separate patch]
Signed-off-by: Richard Henderson 
---
 disas/disas.c | 10 +-
 disas/meson.build |  3 ++-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/disas/disas.c b/disas/disas.c
index 5e7401bb6f..954b385a82 100644
--- a/disas/disas.c
+++ b/disas/disas.c
@@ -122,11 +122,11 @@ void disas_initialize_debug_target(CPUDebug *s, CPUState 
*cpu)
 s->cpu = cpu;
 s->info.read_memory_func = target_read_memory;
 s->info.print_address_func = print_address;
-#if TARGET_BIG_ENDIAN
-s->info.endian = BFD_ENDIAN_BIG;
-#else
-s->info.endian = BFD_ENDIAN_LITTLE;
-#endif
+if (target_words_bigendian()) {
+s->info.endian = BFD_ENDIAN_BIG;
+} else {
+s->info.endian =  BFD_ENDIAN_LITTLE;
+}
 
 CPUClass *cc = CPU_GET_CLASS(cpu);
 if (cc->disas_set_info) {
diff --git a/disas/meson.build b/disas/meson.build
index f40230c58f..2ae44691fa 100644
--- a/disas/meson.build
+++ b/disas/meson.build
@@ -13,4 +13,5 @@ common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: 
files('xtensa.c'))
 common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
 
 softmmu_ss.add(files('disas-mon.c'))
-specific_ss.add(files('disas.c'), capstone)
+common_ss.add(files('disas.c'), capstone)
+specific_ss.add(capstone)
-- 
2.34.1

[PATCH v2 2/5] disas: Remove target_ulong from the interface

2023-05-09 Thread Richard Henderson

Use uint64_t for the pc, and size_t for the size.

Signed-off-by: Richard Henderson 
Message-Id: <20230503072331.1747057-81-richard.hender...@linaro.org>
---
 include/disas/disas.h | 17 ++---
 bsd-user/elfload.c|  5 +++--
 disas/disas.c | 19 +--
 linux-user/elfload.c  |  5 +++--
 4 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/include/disas/disas.h b/include/disas/disas.h
index d363e95ede..6c394e0b09 100644
--- a/include/disas/disas.h
+++ b/include/disas/disas.h
@@ -7,28 +7,23 @@
 #include "cpu.h"
 
 /* Disassemble this for me please... (debugging). */
-void disas(FILE *out, const void *code, unsigned long size);
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
-  target_ulong size);
+void disas(FILE *out, const void *code, size_t size);
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
 
-void monitor_disas(Monitor *mon, CPUState *cpu,
-   target_ulong pc, int nb_insn, int is_physical);
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
+   int nb_insn, bool is_physical);
 
 char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
 
 /* Look up symbol for debugging purpose.  Returns "" if unknown. */
-const char *lookup_symbol(target_ulong orig_addr);
+const char *lookup_symbol(uint64_t orig_addr);
 #endif
 
 struct syminfo;
 struct elf32_sym;
 struct elf64_sym;
 
-#if defined(CONFIG_USER_ONLY)
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong 
orig_addr);
-#else
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr);
-#endif
+typedef const char *(*lookup_symbol_t)(struct syminfo *s, uint64_t orig_addr);
 
 struct syminfo {
 lookup_symbol_t lookup_symbol;
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index fbcdc94b96..2e76f0d3b5 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -352,9 +352,10 @@ static abi_ulong load_elf_interp(struct elfhdr 
*interp_elf_ex,
 
 static int symfind(const void *s0, const void *s1)
 {
-target_ulong addr = *(target_ulong *)s0;
+__typeof(sym->st_value) addr = *(uint64_t *)s0;
 struct elf_sym *sym = (struct elf_sym *)s1;
 int result = 0;
+
 if (addr < sym->st_value) {
 result = -1;
 } else if (addr >= sym->st_value + sym->st_size) {
@@ -363,7 +364,7 @@ static int symfind(const void *s0, const void *s1)
 return result;
 }
 
-static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
+static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
 {
 #if ELF_CLASS == ELFCLASS32
 struct elf_sym *syms = s->disas_symtab.elf32;
diff --git a/disas/disas.c b/disas/disas.c
index b087c12c47..f5e95043cf 100644
--- a/disas/disas.c
+++ b/disas/disas.c
@@ -204,10 +204,9 @@ static void initialize_debug_host(CPUDebug *s)
 }
 
 /* Disassemble this for me please... (debugging).  */
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
-  target_ulong size)
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
 {
-target_ulong pc;
+uint64_t pc;
 int count;
 CPUDebug s;
 
@@ -226,7 +225,7 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong 
code,
 }
 
 for (pc = code; size > 0; pc += count, size -= count) {
-   fprintf(out, "0x" TARGET_FMT_lx ":  ", pc);
+   fprintf(out, "0x%08" PRIx64 ":  ", pc);
count = s.info.print_insn(pc, );
fprintf(out, "\n");
if (count < 0)
@@ -292,7 +291,7 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t 
size)
 }
 
 /* Disassemble this for me please... (debugging). */
-void disas(FILE *out, const void *code, unsigned long size)
+void disas(FILE *out, const void *code, size_t size)
 {
 uintptr_t pc;
 int count;
@@ -324,7 +323,7 @@ void disas(FILE *out, const void *code, unsigned long size)
 }
 
 /* Look up symbol for debugging purpose.  Returns "" if unknown. */
-const char *lookup_symbol(target_ulong orig_addr)
+const char *lookup_symbol(uint64_t orig_addr)
 {
 const char *symbol = "";
 struct syminfo *s;
@@ -356,8 +355,8 @@ physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int 
length,
 }
 
 /* Disassembler for the monitor.  */
-void monitor_disas(Monitor *mon, CPUState *cpu,
-   target_ulong pc, int nb_insn, int is_physical)
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
+   int nb_insn, bool is_physical)
 {
 int count, i;
 CPUDebug s;
@@ -378,13 +377,13 @@ void monitor_disas(Monitor *mon, CPUState *cpu,
 }
 
 if (!s.info.print_insn) {
-monitor_printf(mon, "0x" TARGET_FMT_lx
+monitor_printf(mon, "0x%08" PRIx64
": Asm output not supported on this arch\n", pc);
 return;
 }
 
 for (i = 0; i < nb_insn; i++) {
-g_string_append_printf(ds, "0x" TARGET_FMT_lx ":  ", pc);
+g_string_append_printf(ds, "0x%08" PRIx64 ":

[PATCH v2 0/5] Make the core disassembler functions target-independent

2023-05-09 Thread Richard Henderson

Merges Thomas' RFC patch set with part of my "build-tcg-once" patch set.
The only real change from Thomas' is to use uint64_t instead of hwaddr.


r~


Richard Henderson (3):
  disas: Move disas.c to disas/
  disas: Remove target_ulong from the interface
  disas: Remove target-specific headers

Thomas Huth (2):
  disas: Move softmmu specific code to separate file
  disas: Move disas.c into the target-independent source set

 meson.build  |   3 --
 disas/disas-internal.h   |  21 
 include/disas/disas.h|  23 +++--
 bsd-user/elfload.c   |   5 +-
 disas/disas-mon.c|  65 +
 disas.c => disas/disas.c | 100 +++
 linux-user/elfload.c |   5 +-
 disas/meson.build|   6 ++-
 8 files changed, 121 insertions(+), 107 deletions(-)
 create mode 100644 disas/disas-internal.h
 create mode 100644 disas/disas-mon.c
 rename disas.c => disas/disas.c (79%)

-- 
2.34.1

[PATCH v2 4/5] disas: Move softmmu specific code to separate file

2023-05-09 Thread Richard Henderson

From: Thomas Huth 

We'd like to move disas.c into the common code source set, where
CONFIG_USER_ONLY is not available anymore. So we have to move
the related code into a separate file instead.

Signed-off-by: Thomas Huth 
[rth: Type change done in a separate patch]
Signed-off-by: Richard Henderson 
---
 disas/disas-internal.h | 21 
 disas/disas-mon.c  | 65 
 disas/disas.c  | 76 --
 disas/meson.build  |  1 +
 4 files changed, 93 insertions(+), 70 deletions(-)
 create mode 100644 disas/disas-internal.h
 create mode 100644 disas/disas-mon.c

diff --git a/disas/disas-internal.h b/disas/disas-internal.h
new file mode 100644
index 00..84a01f126f
--- /dev/null
+++ b/disas/disas-internal.h
@@ -0,0 +1,21 @@
+/*
+ * Definitions used internally in the disassembly code
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef DISAS_INTERNAL_H
+#define DISAS_INTERNAL_H
+
+#include "disas/dis-asm.h"
+
+typedef struct CPUDebug {
+struct disassemble_info info;
+CPUState *cpu;
+} CPUDebug;
+
+void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu);
+int disas_gstring_printf(FILE *stream, const char *fmt, ...)
+G_GNUC_PRINTF(2, 3);
+
+#endif
diff --git a/disas/disas-mon.c b/disas/disas-mon.c
new file mode 100644
index 00..48ac492c6c
--- /dev/null
+++ b/disas/disas-mon.c
@@ -0,0 +1,65 @@
+/*
+ * Functions related to disassembly from the monitor
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "disas-internal.h"
+#include "disas/disas.h"
+#include "exec/memory.h"
+#include "hw/core/cpu.h"
+#include "monitor/monitor.h"
+
+static int
+physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
+ struct disassemble_info *info)
+{
+CPUDebug *s = container_of(info, CPUDebug, info);
+MemTxResult res;
+
+res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
+ myaddr, length);
+return res == MEMTX_OK ? 0 : EIO;
+}
+
+/* Disassembler for the monitor.  */
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
+   int nb_insn, bool is_physical)
+{
+int count, i;
+CPUDebug s;
+g_autoptr(GString) ds = g_string_new("");
+
+disas_initialize_debug_target(, cpu);
+s.info.fprintf_func = disas_gstring_printf;
+s.info.stream = (FILE *)ds;  /* abuse this slot */
+
+if (is_physical) {
+s.info.read_memory_func = physical_read_memory;
+}
+s.info.buffer_vma = pc;
+
+if (s.info.cap_arch >= 0 && cap_disas_monitor(, pc, nb_insn)) {
+monitor_puts(mon, ds->str);
+return;
+}
+
+if (!s.info.print_insn) {
+monitor_printf(mon, "0x%08" PRIx64
+   ": Asm output not supported on this arch\n", pc);
+return;
+}
+
+for (i = 0; i < nb_insn; i++) {
+g_string_append_printf(ds, "0x%08" PRIx64 ":  ", pc);
+count = s.info.print_insn(pc, );
+g_string_append_c(ds, '\n');
+if (count < 0) {
+break;
+}
+pc += count;
+}
+
+monitor_puts(mon, ds->str);
+}
diff --git a/disas/disas.c b/disas/disas.c
index a709831e8d..5e7401bb6f 100644
--- a/disas/disas.c
+++ b/disas/disas.c
@@ -1,6 +1,6 @@
 /* General "disassemble this chunk" code.  Used for debugging. */
 #include "qemu/osdep.h"
-#include "disas/dis-asm.h"
+#include "disas/disas-internal.h"
 #include "elf.h"
 #include "qemu/qemu-print.h"
 #include "disas/disas.h"
@@ -8,11 +8,6 @@
 #include "hw/core/cpu.h"
 #include "exec/memory.h"
 
-typedef struct CPUDebug {
-struct disassemble_info info;
-CPUState *cpu;
-} CPUDebug;
-
 /* Filled in by elfload.c.  Simplistic, but will do for now. */
 struct syminfo *syminfos = NULL;
 
@@ -120,7 +115,7 @@ static void initialize_debug(CPUDebug *s)
 s->info.symbol_at_address_func = symbol_at_address;
 }
 
-static void initialize_debug_target(CPUDebug *s, CPUState *cpu)
+void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu)
 {
 initialize_debug(s);
 
@@ -211,7 +206,7 @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, 
size_t size)
 int count;
 CPUDebug s;
 
-initialize_debug_target(, cpu);
+disas_initialize_debug_target(, cpu);
 s.info.fprintf_func = fprintf;
 s.info.stream = out;
 s.info.buffer_vma = code;
@@ -241,8 +236,7 @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, 
size_t size)
 }
 }
 
-static int G_GNUC_PRINTF(2, 3)
-gstring_printf(FILE *stream, const char *fmt, ...)
+int disas_gstring_printf(FILE *stream, const char *fmt, ...)
 {
 /* We abuse the FILE parameter to pass a GString. */
 GString *s = (GString *)stream;
@@ -272,8 +266,8 @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t 
size)
 CPUDebug s;
 GString *ds = g_string_new(NULL);
 
-initialize_debug_target(, cpu);
-s.info.fprintf_func =

[PATCH v1 1/1] s390x/pv: Fix spurious warning with asynchronous teardown

2023-05-09 Thread Claudio Imbrenda

When rebooting a small VM using asynchronous teardown, a spurious
warning is emitted when the KVM_PV_ASYNC_CLEANUP_PREPARE ioctl fails.

Avoid using asynchronous teardown altogether when the VM is small
enough; the cutoff is set at 4GiB. This will avoid triggering the
warning and also avoid pointless overhead; normal teardown is fast
enough for small VMs.

Reported-by: Marc Hartmayer 
Fixes: c3a073c610 ("s390x/pv: Add support for asynchronous teardown for reboot")
Signed-off-by: Claudio Imbrenda 
---
 hw/s390x/pv.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
index 49ea38236c..17c5556319 100644
--- a/hw/s390x/pv.c
+++ b/hw/s390x/pv.c
@@ -13,6 +13,7 @@
 
 #include 
 
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "sysemu/kvm.h"
@@ -117,13 +118,16 @@ static void *s390_pv_do_unprot_async_fn(void *p)
 
 bool s390_pv_vm_try_disable_async(void)
 {
+MachineState *machine = MACHINE(qdev_get_machine());
 /*
  * t is only needed to create the thread; once qemu_thread_create
  * returns, it can safely be discarded.
  */
 QemuThread t;
 
-if (!kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) 
{
+/* Avoid the overhead of asynchronous teardown for small machines */
+if ((machine->maxram_size < 4 * GiB) ||
+!kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) 
{
 return false;
 }
 if (s390_pv_cmd(KVM_PV_ASYNC_CLEANUP_PREPARE, NULL) != 0) {
-- 
2.40.1

Re: [PATCH v2] iotests: Test resizing image attached to an iothread

2023-05-09 Thread Eric Blake

On Tue, May 09, 2023 at 03:41:33PM +0200, Kevin Wolf wrote:
> This tests that trying to resize an image with QMP block_resize doesn't
> hang or otherwise fail when the image is attached to a device running in
> an iothread.
> 
> This is a regression test for the recent fix that changed
> qmp_block_resize, which is a coroutine based QMP handler, to avoid
> calling no_coroutine_fns directly.
> 
> Signed-off-by: Kevin Wolf 
> ---
> v2:
> - Not all formats/protocols support resizing, restrict the list
> - Filter _img_info output, it varies between formats

Shoot, I read my email out-of-order today and gave R-b on v1 before
even seeing that you had already posted v2.

These changes make sense, and make the test better.

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PULL 00/17] QAPI patches patches for 2023-05-09

2023-05-09 Thread Richard Henderson


On 5/9/23 08:59, Markus Armbruster wrote:

The following changes since commit 792f77f376adef944f9a03e601f6ad90c2f891b2:

   Merge tag 'pull-loongarch-20230506' of https://gitlab.com/gaosong/qemu into 
staging (2023-05-06 08:11:52 +0100)

are available in the Git repository at:

   https://repo.or.cz/qemu/armbru.git tags/pull-qapi-2023-05-09

for you to fetch changes up to ddd37ae995acacfd858d2ee090c3fa61e33b986b:

   qapi: Reformat doc comments to conform to current conventions (2023-05-09 
09:57:15 +0200)


QAPI patches patches for 2023-05-09


Markus Armbruster (17):
   docs/devel/qapi-code-gen: Clean up use of quotes a bit
   docs/devel/qapi-code-gen: Turn FIXME admonitions into comments
   qapi: Fix crash on stray double quote character
   meson: Fix to make QAPI generator output depend on main.py
   Revert "qapi: BlockExportRemoveMode: move comments to TODO"
   sphinx/qapidoc: Do not emit TODO sections into user manuals
   qapi: Tidy up a slightly awkward TODO comment
   qapi/dump: Indent bulleted lists consistently
   tests/qapi-schema/doc-good: Improve a comment
   tests/qapi-schema/doc-good: Improve argument description tests
   qapi: Fix argument description indentation stripping
   qapi: Rewrite parsing of doc comment section symbols and tags
   qapi: Relax doc string @name: description indentation rules
   qapi: Section parameter @indent is no longer used, drop
   docs/devel/qapi-code-gen: Update doc comment conventions
   qga/qapi-schema: Reformat doc comments to conform to current conventions
   qapi: Reformat doc comments to conform to current conventions


This has a number of CI failures, including

https://gitlab.com/qemu-project/qemu/-/jobs/4252925621#L4673
../docs/meson.build:27:6: ERROR: Problem encountered: Install a Python 3 version of 
python-sphinx and the readthedoc theme



r~

Re: [PATCH v2 0/2] tests/lcitool: Add mtools and xorriso and remove genisoimage as dependencies

2023-05-09 Thread Michael S. Tsirkin

On Thu, May 04, 2023 at 09:16:09PM +0530, Ani Sinha wrote:
> mformat and xorriso tools are needed by biosbits avocado tests. This patchset
> adds those two tools in the docker container images.
> xorriso package conflicts with genisoimage package on some distributions.
> Therefore, it is not possible to have both the packages at the same time
> in the container image uniformly for all distribution flavors. Further,
> on some distributions like RHEL, both xorriso and genisoimage
> packages provide /usr/bin/genisoimage and on some other distributions like
> Fedora, only genisoimage package provides the same utility.
> Therefore, this change removes the dependency on geninsoimage for building
> container images altogether keeping only xorriso package. At the same time,
> cdrom-test.c is updated to use and check for existence of only xorrisofs.
> 
> Patch 1 pulls in the latest changes in lcitool in order to add mappings
> for these packages in various distros.
> Patch 2 updates all Dockerfiles in QEMU repository to add these two
> tools. It also removed genisoimage package and updated cdrom-test to not
> use genisoimage but xorrisofs.
> 
> CC: m...@redhat.com
> CC: berra...@redhat.com
> CC: alex.ben...@linaro.org
> CC: phi...@linaro.org
> CC: waine...@redhat.com
> CC: bl...@redhat.com
> CC: th...@redhat.com
> TO: qemu-devel@nongnu.org
> 
> Changelog:
> v2: remove genisoimage package and update Dockerfile. Also update cdrom-test.c
> so that it uses xorrisofs and not genisoimage. I have tested patch #2 on both
> Fedora 37 and RHEL 9.1. cdrom-test passed on both.


Reviewed-by: Michael S. Tsirkin 

who's merging this?

> Ani Sinha (2):
>   tests: libvirt-ci: Update to commit 'c8971e90ac' to pull in mformat
> and xorriso
>   tests/lcitool: Add mtools and xorriso and remove genisoimage as
> dependencies
> 
>  .gitlab-ci.d/cirrus/freebsd-13.vars|  2 +-
>  .gitlab-ci.d/cirrus/macos-12.vars  |  2 +-
>  tests/docker/dockerfiles/alpine.docker |  3 ++-
>  tests/docker/dockerfiles/centos8.docker|  3 ++-
>  tests/docker/dockerfiles/debian-amd64-cross.docker |  3 ++-
>  tests/docker/dockerfiles/debian-amd64.docker   |  3 ++-
>  tests/docker/dockerfiles/debian-arm64-cross.docker |  3 ++-
>  tests/docker/dockerfiles/debian-armel-cross.docker |  3 ++-
>  tests/docker/dockerfiles/debian-armhf-cross.docker |  3 ++-
>  .../dockerfiles/debian-mips64el-cross.docker   |  3 ++-
>  .../docker/dockerfiles/debian-mipsel-cross.docker  |  3 ++-
>  .../docker/dockerfiles/debian-ppc64el-cross.docker |  3 ++-
>  tests/docker/dockerfiles/debian-s390x-cross.docker |  3 ++-
>  tests/docker/dockerfiles/fedora-win32-cross.docker |  3 ++-
>  tests/docker/dockerfiles/fedora-win64-cross.docker |  3 ++-
>  tests/docker/dockerfiles/fedora.docker |  3 ++-
>  tests/docker/dockerfiles/opensuse-leap.docker  |  3 ++-
>  tests/docker/dockerfiles/ubuntu2004.docker |  3 ++-
>  tests/docker/dockerfiles/ubuntu2204.docker |  3 ++-
>  tests/lcitool/libvirt-ci   |  2 +-
>  tests/lcitool/projects/qemu.yml|  3 ++-
>  tests/qtest/cdrom-test.c   | 14 +++---
>  22 files changed, 46 insertions(+), 28 deletions(-)
> 
> -- 
> 2.31.1

Re: [PATCH] iotests: Test resizing image attached to an iothread

2023-05-09 Thread Eric Blake

On Tue, May 09, 2023 at 12:59:31PM +0200, Kevin Wolf wrote:
> This tests that trying to resize an image with QMP block_resize doesn't
> hang or otherwise fail when the image is attached to a device running in
> an iothread.
> 
> This is a regression test for the recent fix that changed
> qmp_block_resize, which is a coroutine based QMP handler, to avoid
> calling no_coroutine_fns directly.
> 
> Signed-off-by: Kevin Wolf 
> ---
>  tests/qemu-iotests/tests/iothreads-resize | 70 +++
>  tests/qemu-iotests/tests/iothreads-resize.out | 12 
>  2 files changed, 82 insertions(+)
>  create mode 100755 tests/qemu-iotests/tests/iothreads-resize
>  create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out
>

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 06/11] test-cutils: Add more coverage to qemu_strtosz

2023-05-09 Thread Eric Blake

On Mon, May 08, 2023 at 03:03:38PM -0500, Eric Blake wrote:
> Add some more strings that the user might send our way.  In
> particular, some of these additions include FIXME comments showing
> where our parser doesn't quite behave the way we want.
> 
> Signed-off-by: Eric Blake 
> ---
>  tests/unit/test-cutils.c | 226 +--
>  1 file changed, 215 insertions(+), 11 deletions(-)
> 

> @@ -2704,13 +2749,30 @@ static void test_qemu_strtosz_invalid(void)
> 
>  str = " \t ";
>  endptr = NULL;
> +res = 0xbaadf00d;
>  err = qemu_strtosz(str, , );
>  g_assert_cmpint(err, ==, -EINVAL);
>  g_assert_cmphex(res, ==, 0xbaadf00d);
>  g_assert_true(endptr == str);
> 
> +str = ".";
> +endptr = NULL;
> +res = 0xbaadf00d;
> +err = qemu_strtosz(str, , );
> +g_assert_cmpint(err, ==, -EINVAL);
> +g_assert_cmphex(res, ==, 0xbaadf00d);
> +g_assert(endptr == str);

Rebase botch.  I should be using g_assert_true() here in line with
earlier in the series.  I think I cleaned it up later in the series,
but shouldn't be churning on it that badly.  Looks like I get to send
a v2 to fix this and other things; I'll wait another day for other
reviews first.  (That's what I get for rearranging patches after the
fact for a nicer presentation order...)

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH v2 3/3] pci: ROM preallocation for incoming migration

2023-05-09 Thread David Hildenbrand


On 09.05.23 17:54, Michael S. Tsirkin wrote:

On Wed, May 03, 2023 at 02:39:15PM +0300, Vladimir Sementsov-Ogievskiy wrote:

On 03.05.23 13:05, Michael S. Tsirkin wrote:

On Wed, May 03, 2023 at 12:50:09PM +0300, Vladimir Sementsov-Ogievskiy wrote:

On 03.05.23 12:20, David Hildenbrand wrote:

On 25.04.23 18:14, Vladimir Sementsov-Ogievskiy wrote:

On incoming migration we have the following sequence to load option
ROM:

1. On device realize we do normal load ROM from the file

2. Than, on incoming migration we rewrite ROM from the incoming RAM
      block. If sizes mismatch we fail.

This is not ideal when we migrate to updated distribution: we have to
keep old ROM files in new distribution and be careful around romfile
property to load correct ROM file. Which is loaded actually just to
allocate the ROM with correct length.

Note, that romsize property doesn't really help: if we try to specify
it when default romfile is larger, it fails with something like:

romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 65536

Let's just ignore ROM file when romsize is specified and we are in
incoming migration state. In other words, we need only to preallocate
ROM of specified size, local ROM file is unrelated.

This way:

If romsize was specified on source, we just use same commandline as on
source, and migration will work independently of local ROM files on
target.

If romsize was not specified on source (and we have mismatching local
ROM file on target host), we have to specify romsize on target to match
source romsize. romfile parameter may be kept same as on source or may
be dropped, the file is not loaded anyway.

As a bonus we avoid extra reading from ROM file on target.

Note: when we don't have romsize parameter on source command line and
need it for target, it may be calculated as aligned up to power of two
size of ROM file on source (if we know, which file is it) or,
alternatively it may be retrieved from source QEMU by QMP qom-get
command, like

     { "execute": "qom-get",
   "arguments": {
     "path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]",
     "property": "size" } }

Suggested-by: Michael S. Tsirkin 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
    hw/pci/pci.c | 77 ++--
    1 file changed, 45 insertions(+), 32 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index a442f8fce1..e2cab622e4 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -36,6 +36,7 @@
    #include "migration/vmstate.h"
    #include "net/net.h"
    #include "sysemu/numa.h"
+#include "sysemu/runstate.h"
    #include "sysemu/sysemu.h"
    #include "hw/loader.h"
    #include "qemu/error-report.h"
@@ -2293,10 +2294,16 @@ static void pci_add_option_rom(PCIDevice *pdev, bool 
is_default_rom,
    {
    int64_t size;
    g_autofree char *path = NULL;
-    void *ptr;
    char name[32];
    const VMStateDescription *vmsd;
+    /*
+ * In case of incoming migration ROM will come with migration stream, no
+ * reason to load the file.  Neither we want to fail if local ROM file
+ * mismatches with specified romsize.
+ */
+    bool load_file = !runstate_check(RUN_STATE_INMIGRATE);
+
    if (!pdev->romfile) {
    return;
    }
@@ -2329,32 +2336,35 @@ static void pci_add_option_rom(PCIDevice *pdev, bool 
is_default_rom,
    return;
    }
-    path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
-    if (path == NULL) {
-    path = g_strdup(pdev->romfile);
-    }
+    if (load_file || pdev->romsize == -1) {
+    path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
+    if (path == NULL) {
+    path = g_strdup(pdev->romfile);
+    }
-    size = get_image_size(path);
-    if (size < 0) {
-    error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile);
-    return;
-    } else if (size == 0) {
-    error_setg(errp, "romfile \"%s\" is empty", pdev->romfile);
-    return;
-    } else if (size > 2 * GiB) {
-    error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 GiB)",
-   pdev->romfile);
-    return;
-    }
-    if (pdev->romsize != -1) {
-    if (size > pdev->romsize) {
-    error_setg(errp, "romfile \"%s\" (%u bytes) "
-   "is too large for ROM size %u",
-   pdev->romfile, (uint32_t)size, pdev->romsize);
+    size = get_image_size(path);
+    if (size < 0) {
+    error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile);
+    return;
+    } else if (size == 0) {
+    error_setg(errp, "romfile \"%s\" is empty", pdev->romfile);
+    return;
+    } else if (size > 2 * GiB) {
+    error_setg(errp,
+   "romfile \"%s\" too large (size cannot exceed 2 GiB)",
+   pdev->romfile);
    return;
    }
-    } else {
-    pdev->romsize =

Re: [PATCH 06/11] test-cutils: Add more coverage to qemu_strtosz

2023-05-09 Thread Eric Blake

On Tue, May 09, 2023 at 02:31:08PM +0200, Hanna Czenczek wrote:
> On 08.05.23 22:03, Eric Blake wrote:
> > Add some more strings that the user might send our way.  In
> > particular, some of these additions include FIXME comments showing
> > where our parser doesn't quite behave the way we want.
> > 
> > Signed-off-by: Eric Blake 
> > ---
> >   tests/unit/test-cutils.c | 226 +--
> >   1 file changed, 215 insertions(+), 11 deletions(-)
> 
> I wonder: The plan is to have "1.5e+1k" be parsed as "1.5e" + endptr ==
> "+1k"; but "0x1p1" is not parsed at all (could be "0x1" + "p1"). Is that
> fully intentional?

Pre-series: "1.5e+1k" is parsed as "1" ".5e+1" "k", no slop
Post-series: "1.5e+1k" is parsed as "1" ".5" "e", slop "+1k"

If you call qemu_strtosz(,NULL,), both versions still give EINVAL
error (pre-patch: we detected an exponent in the middle portion of the
parse, which is not allowed; post-series: we detect slop, which is not
allowed).  If you call qemu_strtosz(,), the pre-patch version
fails with EINVAL but the new code succeeds.  But of all the callers,
the only one that passed non-NULL endptr did it's own check that *slop
is only whitespace ("+1k" fails that check), so the end result is that
at the high level, we reject "1.5e+1k" from the user, but the
rejection is now at a different point in the call stack.  The reason I
made the change is that it was less code to guarantee that the
internal qemu_strtod_finite() is passed a string that cannot possibly
contain an exponent, than to blindly call qemu_strtod_finite() on an
arbitrary suffix and then check after the fact on whether an exponent
was actually consumed as part of that parse.

For "0x1p1", the parse is "0x1" plus slop "p1" (unchanged by the
series).  This is rejected with EINVAL (the moment you have hex, we
insist on no suffix or slop, regardless of endptr being NULL or not).
I could have changed it similar to how I changed "1.5e+1k" to allow
the parse at the qemu_strtosz layer and then detect the slop at the
caller, but that was more lines of code and I didn't feel like it was
worth it.

Either way, the unit tests accurately cover the difference in parse
behavior, and I tried to make the documentation (by patch 11/11) be
consistent as well.  But since this is why we have a review process,
I'm open to feedback if you think I need to tweak which parse styles
we should be favoring.

> 
> (Similarly, "1.1.k" is also not parsed at all, but the problem there is not
> just two decimal points, but also that "1.1" would be an invalid size in
> itself, so it really shouldn’t be parsed at all.)

"1.1k" is a valid (if unusual) size.  This particular test addition
did not happen to improve coverage for my final code, but given that
qemu_strtod_finite(".1.k") would stop parsing after the ".1" and still
be pointing at '.', and our read-out-of-bounds was caused by assuming
that qemu_strtod_finite() failures leave *endptr == '.' (which was
invalidated for ".9e999" failing with ERANGE), it didn't hurt to add
the coverage.

> 
> I don’t think it matters to users, really, but I still wonder.

If any of our callers had actually done something like: "size %llx
followed by junk %s" with the parsed value and the junk string, then
it would matter.  But since all of the callers either passed in NULL
endptr (and got EINVAL before even knowing how much of the string was
parsed) or insisted that the junk be whitespace only, and did not
print details about the parsed value before reporting such errors,
you're right that I don't see this as mattering to end users.  But it
took me quite a bit of audit time to convince myself of that.

> > @@ -2875,6 +3056,20 @@ static void test_qemu_strtosz_trailing(void)
> >   err = qemu_strtosz(str, NULL, );
> >   g_assert_cmpint(err, ==, -EINVAL);
> >   g_assert_cmphex(res, ==, 0xbaadf00d);
> > +
> > +/* FIXME overflow in fraction is buggy */
> > +str = "1.5E999";
> > +endptr = NULL;
> > +res = 0xbaadf00d;
> > +err = qemu_strtosz(str, , );
> > +g_assert_cmpint(err, ==, 0);
> > +g_assert_cmpuint(res, ==, EiB /* FIXME EiB * 1.5 */);
> > +g_assert(endptr == str + 9 /* FIXME + 4 */);
> 
> This is “correct” (i.e. it’s the value we’ll get right now, which is the
> wrong one), but gcc complains that the array index is out of bounds
> (well...), which breaks the build.

Huh - wonder what build settings you are using that I wasn't for
seeing that warning - but it makes total sense that 'str + 9' would
trigger a build failure if we don't pad str with enough '\0' to keep
things in bounds.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH v2 3/3] pci: ROM preallocation for incoming migration

2023-05-09 Thread Michael S. Tsirkin

On Wed, May 03, 2023 at 02:39:15PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 03.05.23 13:05, Michael S. Tsirkin wrote:
> > On Wed, May 03, 2023 at 12:50:09PM +0300, Vladimir Sementsov-Ogievskiy 
> > wrote:
> > > On 03.05.23 12:20, David Hildenbrand wrote:
> > > > On 25.04.23 18:14, Vladimir Sementsov-Ogievskiy wrote:
> > > > > On incoming migration we have the following sequence to load option
> > > > > ROM:
> > > > > 
> > > > > 1. On device realize we do normal load ROM from the file
> > > > > 
> > > > > 2. Than, on incoming migration we rewrite ROM from the incoming RAM
> > > > >      block. If sizes mismatch we fail.
> > > > > 
> > > > > This is not ideal when we migrate to updated distribution: we have to
> > > > > keep old ROM files in new distribution and be careful around romfile
> > > > > property to load correct ROM file. Which is loaded actually just to
> > > > > allocate the ROM with correct length.
> > > > > 
> > > > > Note, that romsize property doesn't really help: if we try to specify
> > > > > it when default romfile is larger, it fails with something like:
> > > > > 
> > > > > romfile "efi-virtio.rom" (160768 bytes) is too large for ROM size 
> > > > > 65536
> > > > > 
> > > > > Let's just ignore ROM file when romsize is specified and we are in
> > > > > incoming migration state. In other words, we need only to preallocate
> > > > > ROM of specified size, local ROM file is unrelated.
> > > > > 
> > > > > This way:
> > > > > 
> > > > > If romsize was specified on source, we just use same commandline as on
> > > > > source, and migration will work independently of local ROM files on
> > > > > target.
> > > > > 
> > > > > If romsize was not specified on source (and we have mismatching local
> > > > > ROM file on target host), we have to specify romsize on target to 
> > > > > match
> > > > > source romsize. romfile parameter may be kept same as on source or may
> > > > > be dropped, the file is not loaded anyway.
> > > > > 
> > > > > As a bonus we avoid extra reading from ROM file on target.
> > > > > 
> > > > > Note: when we don't have romsize parameter on source command line and
> > > > > need it for target, it may be calculated as aligned up to power of two
> > > > > size of ROM file on source (if we know, which file is it) or,
> > > > > alternatively it may be retrieved from source QEMU by QMP qom-get
> > > > > command, like
> > > > > 
> > > > >     { "execute": "qom-get",
> > > > >   "arguments": {
> > > > >     "path": "/machine/peripheral/CARD_ID/virtio-net-pci.rom[0]",
> > > > >     "property": "size" } }
> > > > > 
> > > > > Suggested-by: Michael S. Tsirkin 
> > > > > Signed-off-by: Vladimir Sementsov-Ogievskiy 
> > > > > 
> > > > > ---
> > > > >    hw/pci/pci.c | 77 
> > > > > ++--
> > > > >    1 file changed, 45 insertions(+), 32 deletions(-)
> > > > > 
> > > > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > > index a442f8fce1..e2cab622e4 100644
> > > > > --- a/hw/pci/pci.c
> > > > > +++ b/hw/pci/pci.c
> > > > > @@ -36,6 +36,7 @@
> > > > >    #include "migration/vmstate.h"
> > > > >    #include "net/net.h"
> > > > >    #include "sysemu/numa.h"
> > > > > +#include "sysemu/runstate.h"
> > > > >    #include "sysemu/sysemu.h"
> > > > >    #include "hw/loader.h"
> > > > >    #include "qemu/error-report.h"
> > > > > @@ -2293,10 +2294,16 @@ static void pci_add_option_rom(PCIDevice 
> > > > > *pdev, bool is_default_rom,
> > > > >    {
> > > > >    int64_t size;
> > > > >    g_autofree char *path = NULL;
> > > > > -    void *ptr;
> > > > >    char name[32];
> > > > >    const VMStateDescription *vmsd;
> > > > > +    /*
> > > > > + * In case of incoming migration ROM will come with migration 
> > > > > stream, no
> > > > > + * reason to load the file.  Neither we want to fail if local 
> > > > > ROM file
> > > > > + * mismatches with specified romsize.
> > > > > + */
> > > > > +    bool load_file = !runstate_check(RUN_STATE_INMIGRATE);
> > > > > +
> > > > >    if (!pdev->romfile) {
> > > > >    return;
> > > > >    }
> > > > > @@ -2329,32 +2336,35 @@ static void pci_add_option_rom(PCIDevice 
> > > > > *pdev, bool is_default_rom,
> > > > >    return;
> > > > >    }
> > > > > -    path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
> > > > > -    if (path == NULL) {
> > > > > -    path = g_strdup(pdev->romfile);
> > > > > -    }
> > > > > +    if (load_file || pdev->romsize == -1) {
> > > > > +    path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
> > > > > +    if (path == NULL) {
> > > > > +    path = g_strdup(pdev->romfile);
> > > > > +    }
> > > > > -    size = get_image_size(path);
> > > > > -    if (size < 0) {
> > > > > -    error_setg(errp, "failed to find romfile \"%s\"", 
> > > > > pdev->romfile);
> > > > > -    return;
> > > > > -    } else if (size == 0) {
> > > > > -    error_setg(errp, "romfile \"%s\"

Re: [RFC PATCH 0/2] Make the core disassembler functions target-independent

2023-05-09 Thread Richard Henderson


On 5/8/23 15:04, Richard Henderson wrote:

On 5/8/23 14:37, Thomas Huth wrote:

Move disas.c into the target-independent source set, so that we
only have to compile this code once instead multiple times (one
time for each target).

Marked as RFC since we have to replace the target_ulongs here
with hwaddr, and the TARGET_FMT_lx with HWADDR_FMT_plx, which is
a little bit ugly ... what's your opinion?

Thomas Huth (2):
   disas: Move softmmu specific code to separate file
   disas: Move disas.c into the target-independent source set


Patches 79-83 from

https://patchew.org/QEMU/20230503072331.1747057-1-richard.hender...@linaro.org/

do the same thing, using uint64_t instead of hwaddr (it's not).


I've just realized that only the choice of hwaddr/uint64_t overlap; I failed to take this 
all of the way to building disas once.


I will integrate our two sets.


r~

Re: [PATCH 06/11] test-cutils: Add more coverage to qemu_strtosz

2023-05-09 Thread Eric Blake

On Tue, May 09, 2023 at 05:15:04PM +0200, Hanna Czenczek wrote:
> On 08.05.23 22:03, Eric Blake wrote:
> > Add some more strings that the user might send our way.  In
> > particular, some of these additions include FIXME comments showing
> > where our parser doesn't quite behave the way we want.
> > 
> > Signed-off-by: Eric Blake 
> > ---
> >   tests/unit/test-cutils.c | 226 +--
> >   1 file changed, 215 insertions(+), 11 deletions(-)
> > 
> > diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
> > index afae2ee5331..9fa6fb042e8 100644
> > --- a/tests/unit/test-cutils.c
> > +++ b/tests/unit/test-cutils.c
> 
> [...]
> 
> > @@ -2875,6 +3056,20 @@ static void test_qemu_strtosz_trailing(void)
> >   err = qemu_strtosz(str, NULL, );
> >   g_assert_cmpint(err, ==, -EINVAL);
> >   g_assert_cmphex(res, ==, 0xbaadf00d);
> > +
> > +/* FIXME overflow in fraction is buggy */
> > +str = "1.5E999";
> > +endptr = NULL;
> > +res = 0xbaadf00d;
> > +err = qemu_strtosz(str, , );
> > +g_assert_cmpint(err, ==, 0);
> > +g_assert_cmpuint(res, ==, EiB /* FIXME EiB * 1.5 */);
> > +g_assert(endptr == str + 9 /* FIXME + 4 */);
> > +
> > +res = 0xbaadf00d;
> > +err = qemu_strtosz(str, NULL, );
> > +g_assert_cmpint(err, ==, -EINVAL);
> > +g_assert_cmphex(res, ==, 0xbaadf00d);
> 
> Got it now!
> 
> Our problem is that `endptr` is beyond the end of the string, precisely as
> gcc complains.  The data there is undefined, and depending on the value in
> the g_assert_cmpuint() (which is converted to strings for the potential
> error message) it sometimes is "endptr == str + 9" (the one in the
> g_assert()) and sometimes isn’t.
> 
> If it is "endptr == str + 9", then the 'e' is taken as a suffix, which makes
> `res == EiB`, and `endptr == "ndptr == str + 9"`.
> 
> If it isn’t, well, it might be anything, so there often is no valid suffix,
> making `res == 1`.
> 
> So the solution is to set `str = "1.5E999\0\0"`, so we don’t get out of
> bounds and know exactly what will be parsed.  Then, at str[8] there is no
> valid suffix (it’s \0), so `res == 1` and `endptr == str + 8`.  This will
> then lead to the qemu_strtosz(str, NULL, ) below succeed, because, well,
> it’s a valid number.  I suppose it failed on your end because the
> out-of-bounds `str[9]` value was not '\0'.
> 
> That was a fun debugging session.

Wow, yeah, that's a mess.  The very test proving that we have a
read-out-of-bounds bug is super-sensitive to what is at that location.
Your hack of passing in extra \0 is awesome; I'll fold that in whether
we need a v2 for other reasons, or in-place if we can take the rest of
this series as-is.  It all goes away at the end of the series,
anyways, once the out-of-bounds read is fixed.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH v2 3/3] pci: ROM preallocation for incoming migration

2023-05-09 Thread Juan Quintela

"Michael S. Tsirkin"  wrote:
> On Tue, May 02, 2023 at 12:11:38PM +0200, Juan Quintela wrote:
>> "Michael S. Tsirkin"  wrote:
>> 
>> >> > CC pbonzini,dgilbert,quintela,armbru : guys, is poking at 
>> >> > runstate_check like
>> >> > this the right way to figure out we are not going to use the
>> >> > device locally before incoming migration will overwrite ROM contents?
>> >> 
>> >> RUN_STATE_INMIGRATE is set in the only one place in qemu_init() when
>> >> we parse cmdline option -incoming. VM is not running for sure. And
>> >> starting the VM comes with changing the state. So it's OK.
>> >> 
>> >> The possible problem, if we add netcard on target which we didn't
>> >> have on source. I now checked, this works.. But that doesn't seem
>> >> correct to add device that was not present on source - how would it
>> >> work - it's not guaranteed anyway.
>> >
>> > You can add it on source too while migration is in progress, no?
>> 
>> DeviceState *qdev_device_add_from_qdict(const QDict *opts,
>> bool from_json, Error **errp)
>> {
>> 
>> if (!migration_is_idle()) {
>> error_setg(errp, "device_add not allowed while migrating");
>> return NULL;
>> }
>> 
>> It should be similar for unplug.
>> 
>> We only support hotplug for some devices during migration, and we
>> shouldn't need any.
>> 
>> What I think he means is that you can add a device on the command line
>> on destination that don't exist on the source machine, and that will
>> confuse things.
>> 
>> In that case, I would say that the problem is that you are doing
>> something not supported.  You are expected that when you run migration
>> you use the same command line that on source, module whatever
>> hot[un]plug operations you have done before migration.
>> 
>> Anything else is not supported.
>> And for instance, if you are using libvirt, it will do the right thing.
>> 
>> Later, Juan.
>
> OK, so you ack this patch?

Reviewed-by: Juan Quintela 

It is ok, or should I do it at toplevel?

Later, Juan.

[PATCH v3 4/5] vdpa: return errno in vhost_vdpa_get_vring_group error

2023-05-09 Thread Eugenio Pérez

We need to tell in the caller, as some errors are expected in a normal
workflow.  In particular, parent drivers in recent kernels with
VHOST_BACKEND_F_IOTLB_ASID may not support vring groups.  In that case,
-ENOTSUP is returned.

This is the case of vp_vdpa in Linux 6.2.

Next patches in this series will use that information to know if it must
abort or not.  Also, next patches return properly an errp instead of
printing with error_report.

Reviewed-by: Stefano Garzarella 
Acked-by: Jason Wang 
Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 37cdc84562..3fb833fe76 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -362,6 +362,14 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+/**
+ * Get vring virtqueue group
+ *
+ * @device_fd  vdpa device fd
+ * @vq_index   Virtqueue index
+ *
+ * Return -errno in case of error, or vq group if success.
+ */
 static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
 {
 struct vhost_vring_state state = {
@@ -370,6 +378,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, 
unsigned vq_index)
 int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, );
 
 if (unlikely(r < 0)) {
+r = -errno;
 error_report("Cannot get VQ %u group: %s", vq_index,
  g_strerror(errno));
 return r;
-- 
2.31.1

[PATCH v3 3/5] vdpa: add vhost_vdpa_set_dev_features_fd

2023-05-09 Thread Eugenio Pérez

This allows to set the features of a vhost-vdpa device from external
subsystems like vhost-net.  It is used in subsequent patches to
negotiate features and probe for CVQ ASID isolation.

Reviewed-by: Stefano Garzarella 
Acked-by: Jason Wang 
Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  1 +
 hw/virtio/vhost-vdpa.c | 20 +---
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 28de7da91e..a9cb6f3a32 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -55,6 +55,7 @@ typedef struct vhost_vdpa {
 } VhostVDPA;
 
 void vhost_vdpa_reset_status_fd(int fd);
+int vhost_vdpa_set_dev_features_fd(int fd, uint64_t features);
 int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range 
*iova_range);
 
 int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 7a2053b8d9..acd5be46a9 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -651,11 +651,22 @@ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
 return 0;
 }
 
+int vhost_vdpa_set_dev_features_fd(int fd, uint64_t features)
+{
+int ret;
+
+ret = vhost_vdpa_call_fd(fd, VHOST_SET_FEATURES, );
+if (ret) {
+return ret;
+}
+
+return vhost_vdpa_add_status_fd(fd, VIRTIO_CONFIG_S_FEATURES_OK);
+}
+
 static int vhost_vdpa_set_features(struct vhost_dev *dev,
uint64_t features)
 {
 struct vhost_vdpa *v = dev->opaque;
-int ret;
 
 if (!vhost_vdpa_first_dev(dev)) {
 return 0;
@@ -678,12 +689,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,
 }
 
 trace_vhost_vdpa_set_features(dev, features);
-ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, );
-if (ret) {
-return ret;
-}
-
-return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
+return vhost_vdpa_set_dev_features_fd(vhost_vdpa_dev_fd(dev), features);
 }
 
 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
-- 
2.31.1

Re: QEMU developers fortnightly call for agenda for 2023-05-16

2023-05-09 Thread Juan Quintela

Mark Burton  wrote:
> I’d appreciate an update on single binary.
> Also, What’s the status on the “icount” plugin ?

Annotated.

BTW, if people are interested I can expose the "idea" of all the
migration patches going on the tree.

Later, Juan.

[PATCH v3 2/5] vdpa: add vhost_vdpa_reset_status_fd

2023-05-09 Thread Eugenio Pérez

This allows to reset a vhost-vdpa device from external subsystems like
vhost-net, since it does not have any struct vhost_dev by the time we
need to use it.

It is used in subsequent patches to negotiate features
and probe for CVQ ASID isolation.

Reviewed-by: Stefano Garzarella 
Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  1 +
 hw/virtio/vhost-vdpa.c | 58 +++---
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index c278a2a8de..28de7da91e 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -54,6 +54,7 @@ typedef struct vhost_vdpa {
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
 
+void vhost_vdpa_reset_status_fd(int fd);
 int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range 
*iova_range);
 
 int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index bbabea18f3..7a2053b8d9 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -335,38 +335,45 @@ static const MemoryListener vhost_vdpa_memory_listener = {
 .region_del = vhost_vdpa_listener_region_del,
 };
 
-static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
- void *arg)
+static int vhost_vdpa_dev_fd(const struct vhost_dev *dev)
 {
 struct vhost_vdpa *v = dev->opaque;
-int fd = v->device_fd;
-int ret;
 
 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
+return v->device_fd;
+}
+
+static int vhost_vdpa_call_fd(int fd, unsigned long int request, void *arg)
+{
+int ret = ioctl(fd, request, arg);
 
-ret = ioctl(fd, request, arg);
 return ret < 0 ? -errno : ret;
 }
 
-static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
+static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
+   void *arg)
+{
+return vhost_vdpa_call_fd(vhost_vdpa_dev_fd(dev), request, arg);
+}
+
+static int vhost_vdpa_add_status_fd(int fd, uint8_t status)
 {
 uint8_t s;
 int ret;
 
-trace_vhost_vdpa_add_status(dev, status);
-ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, );
+ret = vhost_vdpa_call_fd(fd, VHOST_VDPA_GET_STATUS, );
 if (ret < 0) {
 return ret;
 }
 
 s |= status;
 
-ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, );
+ret = vhost_vdpa_call_fd(fd, VHOST_VDPA_SET_STATUS, );
 if (ret < 0) {
 return ret;
 }
 
-ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, );
+ret = vhost_vdpa_call_fd(fd, VHOST_VDPA_GET_STATUS, );
 if (ret < 0) {
 return ret;
 }
@@ -378,6 +385,12 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, 
uint8_t status)
 return 0;
 }
 
+static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
+{
+trace_vhost_vdpa_add_status(dev, status);
+return vhost_vdpa_add_status_fd(vhost_vdpa_dev_fd(dev), status);
+}
+
 int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range)
 {
 int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
@@ -709,16 +722,20 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
 return ret;
 }
 
+static int vhost_vdpa_reset_device_fd(int fd)
+{
+uint8_t status = 0;
+
+return vhost_vdpa_call_fd(fd, VHOST_VDPA_SET_STATUS, );
+}
+
 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
 {
 struct vhost_vdpa *v = dev->opaque;
-int ret;
-uint8_t status = 0;
 
-ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, );
-trace_vhost_vdpa_reset_device(dev);
 v->suspended = false;
-return ret;
+trace_vhost_vdpa_reset_device(dev);
+return vhost_vdpa_reset_device_fd(vhost_vdpa_dev_fd(dev));
 }
 
 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
@@ -1170,6 +1187,13 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, 
bool started)
 return 0;
 }
 
+void vhost_vdpa_reset_status_fd(int fd)
+{
+vhost_vdpa_reset_device_fd(fd);
+vhost_vdpa_add_status_fd(fd, VIRTIO_CONFIG_S_ACKNOWLEDGE |
+ VIRTIO_CONFIG_S_DRIVER);
+}
+
 static void vhost_vdpa_reset_status(struct vhost_dev *dev)
 {
 struct vhost_vdpa *v = dev->opaque;
@@ -1178,9 +1202,7 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev)
 return;
 }
 
-vhost_vdpa_reset_device(dev);
-vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
-   VIRTIO_CONFIG_S_DRIVER);
+vhost_vdpa_reset_status_fd(vhost_vdpa_dev_fd(dev));
 memory_listener_unregister(>listener);
 }
 
-- 
2.31.1

[PATCH v3 1/5] vdpa: Remove status in reset tracing

2023-05-09 Thread Eugenio Pérez

It is always 0 and it is not useful to route call through file
descriptor.

Reviewed-by: Stefano Garzarella 
Acked-by: Jason Wang 
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 2 +-
 hw/virtio/trace-events | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index bc6bad23d5..bbabea18f3 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -716,7 +716,7 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev)
 uint8_t status = 0;
 
 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, );
-trace_vhost_vdpa_reset_device(dev, status);
+trace_vhost_vdpa_reset_device(dev);
 v->suspended = false;
 return ret;
 }
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 8f8d05cf9b..6265231683 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -44,7 +44,7 @@ vhost_vdpa_set_mem_table(void *dev, uint32_t nregions, 
uint32_t padding) "dev: %
 vhost_vdpa_dump_regions(void *dev, int i, uint64_t guest_phys_addr, uint64_t 
memory_size, uint64_t userspace_addr, uint64_t flags_padding) "dev: %p %d: 
guest_phys_addr: 0x%"PRIx64" memory_size: 0x%"PRIx64" userspace_addr: 
0x%"PRIx64" flags_padding: 0x%"PRIx64
 vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 
0x%"PRIx64
 vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id 
%"PRIu32
-vhost_vdpa_reset_device(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8
+vhost_vdpa_reset_device(void *dev) "dev: %p"
 vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq 
idx: %d"
 vhost_vdpa_set_vring_ready(void *dev) "dev: %p"
 vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s"
-- 
2.31.1

[PATCH v3 0/5] Move ASID test to vhost-vdpa net initialization

2023-05-09 Thread Eugenio Pérez

QEMU v8.0 is able to switch dynamically between vhost-vdpa passthrough
and SVQ mode as long as the net device does not have CVQ.  The net device
state followed (and migrated) by CVQ requires special care.

A pre-requisite to add CVQ to that framework is to determine if devices with
CVQ are migratable or not at initialization time.  The solution to it is to
always shadow only CVQ, and vq groups and ASID are used for that.

However, current qemu version only checks ASID at device start (as "driver set
DRIVER_OK status bit"), not at device initialization.  A check at
initialization time is required.  Otherwise, the guest would be able to set
and remove migration blockers at will [1].

This series is a requisite for migration of vhost-vdpa net devices with CVQ.
However it already makes sense by its own, as it reduces the number of ioctls
at migration time, decreasing the error paths there.

[1] 
https://lore.kernel.org/qemu-devel/2616f0cd-f9e8-d183-ea78-db1be4825...@redhat.com/
---
v3:
* Only record cvq_isolated, true if the device have cvq isolated in both !MQ
* and MQ configurations.
* Drop the cache of cvq group, it can be done on top

v2:
* Take out the reset of the device from vhost_vdpa_cvq_is_isolated
  (reported by Lei Yang).
* Expand patch messages by Stefano G. questions.

Eugenio Pérez (5):
  vdpa: Remove status in reset tracing
  vdpa: add vhost_vdpa_reset_status_fd
  vdpa: add vhost_vdpa_set_dev_features_fd
  vdpa: return errno in vhost_vdpa_get_vring_group error
  vdpa: move CVQ isolation check to net_init_vhost_vdpa

 include/hw/virtio/vhost-vdpa.h |   2 +
 hw/virtio/vhost-vdpa.c |  78 ++-
 net/vhost-vdpa.c   | 171 ++---
 hw/virtio/trace-events |   2 +-
 4 files changed, 192 insertions(+), 61 deletions(-)

-- 
2.31.1

[PATCH v3 5/5] vdpa: move CVQ isolation check to net_init_vhost_vdpa

2023-05-09 Thread Eugenio Pérez

Evaluating it at start time instead of initialization time may make the
guest capable of dynamically adding or removing migration blockers.

Also, moving to initialization reduces the number of ioctls in the
migration, reducing failure possibilities.

As a drawback we need to check for CVQ isolation twice: one time with no
MQ negotiated and another one acking it, as long as the device supports
it.  This is because Vring ASID / group management is based on vq
indexes, but we don't know the index of CVQ before negotiating MQ.

Signed-off-by: Eugenio Pérez 
---
v2: Take out the reset of the device from vhost_vdpa_cvq_is_isolated
v3: Only record cvq_isolated, true if the device have cvq isolated in
both !MQ and MQ configurations.
---
 net/vhost-vdpa.c | 178 +++
 1 file changed, 135 insertions(+), 43 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 3fb833fe76..29054b77a9 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -43,6 +43,10 @@ typedef struct VhostVDPAState {
 
 /* The device always have SVQ enabled */
 bool always_svq;
+
+/* The device can isolate CVQ in its own ASID */
+bool cvq_isolated;
+
 bool started;
 } VhostVDPAState;
 
@@ -362,15 +366,8 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
-/**
- * Get vring virtqueue group
- *
- * @device_fd  vdpa device fd
- * @vq_index   Virtqueue index
- *
- * Return -errno in case of error, or vq group if success.
- */
-static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
+  Error **errp)
 {
 struct vhost_vring_state state = {
 .index = vq_index,
@@ -379,8 +376,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, 
unsigned vq_index)
 
 if (unlikely(r < 0)) {
 r = -errno;
-error_report("Cannot get VQ %u group: %s", vq_index,
- g_strerror(errno));
+error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
 return r;
 }
 
@@ -480,9 +476,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
 {
 VhostVDPAState *s, *s0;
 struct vhost_vdpa *v;
-uint64_t backend_features;
 int64_t cvq_group;
-int cvq_index, r;
+int r;
+Error *err = NULL;
 
 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 
@@ -502,41 +498,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
 /*
  * If we early return in these cases SVQ will not be enabled. The migration
  * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
- *
- * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev
- * yet.
  */
-r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, _features);
-if (unlikely(r < 0)) {
-error_report("Cannot get vdpa backend_features: %s(%d)",
-g_strerror(errno), errno);
-return -1;
+if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
+return 0;
 }
-if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
-!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
+
+if (!s->cvq_isolated) {
 return 0;
 }
 
-/*
- * Check if all the virtqueues of the virtio device are in a different vq
- * than the last vq. VQ group of last group passed in cvq_group.
- */
-cvq_index = v->dev->vq_index_end - 1;
-cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
+cvq_group = vhost_vdpa_get_vring_group(v->device_fd,
+   v->dev->vq_index_end - 1,
+   );
 if (unlikely(cvq_group < 0)) {
+error_report_err(err);
 return cvq_group;
 }
-for (int i = 0; i < cvq_index; ++i) {
-int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i);
-
-if (unlikely(group < 0)) {
-return group;
-}
-
-if (group == cvq_group) {
-return 0;
-}
-}
 
 r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
 if (unlikely(r < 0)) {
@@ -799,6 +776,111 @@ static const VhostShadowVirtqueueOps 
vhost_vdpa_net_svq_ops = {
 .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
 };
 
+/**
+ * Probe the device to check control virtqueue is isolated.
+ *
+ * @device_fd vhost-vdpa file descriptor
+ * @features features to negotiate
+ * @cvq_index Control vq index
+ *
+ * Returns -1 in case of error, 0 if false and 1 if true
+ */
+static int vhost_vdpa_cvq_is_isolated(int device_fd, uint64_t features,
+  unsigned cvq_index, Error **errp)
+{
+int64_t cvq_group;
+int r;
+
+r = vhost_vdpa_set_dev_features_fd(device_fd, features);
+if (unlikely(r < 0)) {
+

Re: [PATCH 0/4] vhost-user-fs: Internal migration

2023-05-09 Thread Eugenio Perez Martin

On Tue, May 9, 2023 at 5:30 PM Stefan Hajnoczi  wrote:
>
> On Fri, May 05, 2023 at 04:26:08PM +0200, Eugenio Perez Martin wrote:
> > On Fri, May 5, 2023 at 11:51 AM Hanna Czenczek  wrote:
> > >
> > > (By the way, thanks for the explanations :))
> > >
> > > On 05.05.23 11:03, Hanna Czenczek wrote:
> > > > On 04.05.23 23:14, Stefan Hajnoczi wrote:
> > >
> > > [...]
> > >
> > > >> I think it's better to change QEMU's vhost code
> > > >> to leave stateful devices suspended (but not reset) across
> > > >> vhost_dev_stop() -> vhost_dev_start(), maybe by introducing
> > > >> vhost_dev_suspend() and vhost_dev_resume(). Have you thought about
> > > >> this aspect?
> > > >
> > > > Yes and no; I mean, I haven’t in detail, but I thought this is what’s
> > > > meant by suspending instead of resetting when the VM is stopped.
> > >
> > > So, now looking at vhost_dev_stop(), one problem I can see is that
> > > depending on the back-end, different operations it does will do
> > > different things.
> > >
> > > It tries to stop the whole device via vhost_ops->vhost_dev_start(),
> > > which for vDPA will suspend the device, but for vhost-user will reset it
> > > (if F_STATUS is there).
> > >
> > > It disables all vrings, which doesn’t mean stopping, but may be
> > > necessary, too.  (I haven’t yet really understood the use of disabled
> > > vrings, I heard that virtio-net would have a need for it.)
> > >
> > > It then also stops all vrings, though, so that’s OK.  And because this
> > > will always do GET_VRING_BASE, this is actually always the same
> > > regardless of transport.
> > >
> > > Finally (for this purpose), it resets the device status via
> > > vhost_ops->vhost_reset_status().  This is only implemented on vDPA, and
> > > this is what resets the device there.
> > >
> > >
> > > So vhost-user resets the device in .vhost_dev_start, but vDPA only does
> > > so in .vhost_reset_status.  It would seem better to me if vhost-user
> > > would also reset the device only in .vhost_reset_status, not in
> > > .vhost_dev_start.  .vhost_dev_start seems precisely like the place to
> > > run SUSPEND/RESUME.
> > >
> >
> > I think the same. I just saw It's been proposed at [1].
> >
> > > Another question I have (but this is basically what I wrote in my last
> > > email) is why we even call .vhost_reset_status here.  If the device
> > > and/or all of the vrings are already stopped, why do we need to reset
> > > it?  Naïvely, I had assumed we only really need to reset the device if
> > > the guest changes, so that a new guest driver sees a freshly initialized
> > > device.
> > >
> >
> > I don't know why we didn't need to call it :). I'm assuming the
> > previous vhost-user net did fine resetting vq indexes, using
> > VHOST_USER_SET_VRING_BASE. But I don't know about more complex
> > devices.
>
> It was added so DPDK can batch rx virtqueue RSS updates:
>
> commit 923b8921d210763359e96246a58658ac0db6c645
> Author: Yajun Wu 
> Date:   Mon Oct 17 14:44:52 2022 +0800
>
> vhost-user: Support vhost_dev_start
>
> The motivation of adding vhost-user vhost_dev_start support is to
> improve backend configuration speed and reduce live migration VM
> downtime.
>
> Today VQ configuration is issued one by one. For virtio net with
> multi-queue support, backend needs to update RSS (Receive side
> scaling) on every rx queue enable. Updating RSS is time-consuming
> (typical time like 7ms).
>
> Implement already defined vhost status and message in the vhost
> specification [1].
> (a) VHOST_USER_PROTOCOL_F_STATUS
> (b) VHOST_USER_SET_STATUS
> (c) VHOST_USER_GET_STATUS
>
> Send message VHOST_USER_SET_STATUS with VIRTIO_CONFIG_S_DRIVER_OK for
> device start and reset(0) for device stop.
>
> On reception of the DRIVER_OK message, backend can apply the needed 
> setting
> only once (instead of incremental) and also utilize parallelism on 
> enabling
> queues.
>
> This improves QEMU's live migration downtime with vhost user backend
> implementation by great margin, specially for the large number of VQs of 
> 64
> from 800 msec to 250 msec.
>
> [1] https://qemu-project.gitlab.io/qemu/interop/vhost-user.html
>
> Signed-off-by: Yajun Wu 
> Acked-by: Parav Pandit 
> Message-Id: <20221017064452.1226514-3-yaj...@nvidia.com>
> Reviewed-by: Michael S. Tsirkin 
> Signed-off-by: Michael S. Tsirkin 
>

Sorry for the confusion, what I was wondering is how vhost-user
devices do not need any signal to reset the device before
VHOST_USER_SET_STATUS. And my guess is that it is enough to get / set
the vq indexes.

vhost_user_reset_device is limited to scsi, so it would not work for
the rest of devices.

Thanks!

Re: [PATCH 0/4] vhost-user-fs: Internal migration

2023-05-09 Thread Stefan Hajnoczi

On Fri, May 05, 2023 at 11:03:16AM +0200, Hanna Czenczek wrote:
> On 04.05.23 23:14, Stefan Hajnoczi wrote:
> > On Thu, 4 May 2023 at 13:39, Hanna Czenczek  wrote:
> > > On 11.04.23 17:05, Hanna Czenczek wrote:
> > > 
> > > [...]
> > > 
> > > > Hanna Czenczek (4):
> > > > vhost: Re-enable vrings after setting features
> > > > vhost-user: Interface for migration state transfer
> > > > vhost: Add high-level state save/load functions
> > > > vhost-user-fs: Implement internal migration
> > > I’m trying to write v2, and my intention was to keep the code
> > > conceptually largely the same, but include in the documentation change
> > > thoughts and notes on how this interface is to be used in the future,
> > > when e.g. vDPA “extensions” come over to vhost-user.  My plan was to,
> > > based on that documentation, discuss further.
> > > 
> > > But now I’m struggling to even write that documentation because it’s not
> > > clear to me what exactly the result of the discussion was, so I need to
> > > stop even before that.
> > > 
> > > So as far as I understand, we need/want SUSPEND/RESUME for two reasons:
> > > 1. As a signal to the back-end when virt queues are no longer to be
> > > processed, so that it is clear that it will not do that when asked for
> > > migration state.
> > > 2. Stateful devices that support SET_STATUS receive a status of 0 when
> > > the VM is stopped, which supposedly resets the internal state. While
> > > suspended, device state is frozen, so as far as I understand, SUSPEND
> > > before SET_STATUS would have the status change be deferred until RESUME.
> > I'm not sure about SUSPEND -> SET_STATUS 0 -> RESUME. I guess the
> > device would be reset right away and it would either remain suspended
> > or be resumed as part of reset :).
> > 
> > Unfortunately the concepts of SUSPEND/RESUME and the Device Status
> > Field are orthogonal and there is no spec that explains how they
> > interact.
> 
> Ah, OK.  So I guess it’s up to the implementation to decide whether the
> virtio device status counts as part of the “configuration” that “[it] must
> not change”.
> 
> > > I don’t want to hang myself up on 2 because it doesn’t really seem
> > > important to this series, but: Why does a status of 0 reset the internal
> > > state?  [Note: This is all virtio_reset() seems to do, set the status to
> > > 0.]  The vhost-user specification only points to the virtio
> > > specification, which doesn’t say anything to that effect. Instead, an
> > > explicit device reset is mentioned, which would be
> > > VHOST_USER_RESET_DEVICE, i.e. something completely different. Because
> > > RESET_DEVICE directly contradicts SUSPEND’s description, I would like to
> > > think that invoking RESET_DEVICE on a SUSPEND-ed device is just invalid.
> > The vhost-user protocol didn't have the concept of the VIRTIO Device
> > Status Field until SET_STATUS was added.
> > 
> > In order to participate in the VIRTIO device lifecycle to some extent,
> > the pre-SET_STATUS vhost-user protocol relied on vhost-user-specific
> > messages like RESET_DEVICE.
> > 
> > At the VIRTIO level, devices are reset by setting the Device Status
> > Field to 0.
> 
> (I didn’t find this in the virtio specification until today, turns out it’s
> under 4.1.4.3 “Common configuration structure layout”, not under 2.1 “Device
> Status Field”, where I was looking.)
> 
> > All state is lost and the Device Initialization process
> > must be followed to make the device operational again.
> > 
> > Existing vhost-user backends don't implement SET_STATUS 0 (it's new).
> > 
> > It's messy and not your fault. I think QEMU should solve this by
> > treating stateful devices differently from non-stateful devices. That
> > way existing vhost-user backends continue to work and new stateful
> > devices can also be supported.
> 
> It’s my understanding that SET_STATUS 0/RESET_DEVICE is problematic for
> stateful devices.  In a previous email, you wrote that these should
> implement SUSPEND+RESUME so qemu can use those instead.  But those are
> separate things, so I assume we just use SET_STATUS 0 when stopping the VM
> because this happens to also stop processing vrings as a side effect?

SET_STATUS 0 doesn't do anything in most existing vhost-user backends
and QEMU's vhost code doesn't rely on it doing anything. It was added as
an optimization hint for DPDK's vhost-user-net implementation without
noticing that it breaks stateful devices (see commit 923b8921d210).

> 
> I.e. I understand “treating stateful devices differently” to mean that qemu
> should use SUSPEND+RESUME instead of SET_STATUS 0 when the back-end supports
> it, and stateful back-ends should support it.
> 
> > > Is it that a status 0 won’t explicitly reset the internal state, but
> > > because it does mean that the driver is unbound, the state should
> > > implicitly be reset?
> > I think the fundamental problem is that transports like virtio-pci put
> > registers back in their initialization

Re: [PATCH] hw/mips/malta: Fix minor dead code issue

2023-05-09 Thread Philippe Mathieu-Daudé


On 9/5/23 15:07, Peter Maydell wrote:

On Thu, 6 Apr 2023 at 16:54, Philippe Mathieu-Daudé  wrote:


On 6/4/23 17:37, Peter Maydell wrote:

Coverity points out (in CID 1508390) that write_bootloader has
some dead code, where we assign to 'p' and then in the following
line assign to it again. This happened as a result of the
refactoring in commit cd5066f8618b.

Fix the dead code by removing the 'void *v' variable entirely and
instead adding a cast when calling bl_setup_gt64120_jump_kernel(), as
we do at its other callsite in write_bootloader_nanomips().

Signed-off-by: Peter Maydell 
---
   hw/mips/malta.c | 5 +
   1 file changed, 1 insertion(+), 4 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 


Are you planning to take this into a mips pullreq?
If not, I can throw it into my next target-arm series.


I'll appreciate if you can take this single patch via your
arm tree. Thanks!

Re: [PATCH] migration: Attempt disk reactivation in more failure scenarios

2023-05-09 Thread Juan Quintela

Eric Blake  wrote:
> Commit fe904ea824 added a fail_inactivate label, which tries to
> reactivate disks on the source after a failure while s->state ==
> MIGRATION_STATUS_ACTIVE, but didn't actually use the label if
> qemu_savevm_state_complete_precopy() failed.  This failure to
> reactivate is also present in commit 6039dd5b1c (also covering the new
> s->state == MIGRATION_STATUS_DEVICE state) and 403d18ae (ensuring
> s->block_inactive is set more reliably).
>
> Consolidate the two labels back into one - no matter HOW migration is
> failed, if there is any chance we can reach vm_start() after having
> attempted inactivation, it is essential that we have tried to restart
> disks before then.  This also makes the cleanup more like
> migrate_fd_cancel().
>
> Suggested-by: Kevin Wolf 
> Signed-off-by: Eric Blake 

Reviewed-by: Juan Quintela 

I still can't believe that power down disks and decide if restart (or
not) the vm is such a complicated bussiness.  Sniff.

Re: [PATCH 2/4] vhost-user: Interface for migration state transfer

2023-05-09 Thread Eugenio Perez Martin

On Tue, May 9, 2023 at 5:09 PM Stefan Hajnoczi  wrote:
>
> On Tue, May 09, 2023 at 08:45:33AM +0200, Eugenio Perez Martin wrote:
> > On Mon, May 8, 2023 at 10:10 PM Stefan Hajnoczi  wrote:
> > >
> > > On Thu, Apr 20, 2023 at 03:29:44PM +0200, Eugenio Pérez wrote:
> > > > On Wed, 2023-04-19 at 07:21 -0400, Stefan Hajnoczi wrote:
> > > > > On Wed, 19 Apr 2023 at 07:10, Hanna Czenczek  
> > > > > wrote:
> > > > > > On 18.04.23 09:54, Eugenio Perez Martin wrote:
> > > > > > > On Mon, Apr 17, 2023 at 9:21 PM Stefan Hajnoczi 
> > > > > > > 
> > > > > > > wrote:
> > > > > > > > On Mon, 17 Apr 2023 at 15:08, Eugenio Perez Martin 
> > > > > > > > 
> > > > > > > > wrote:
> > > > > > > > > On Mon, Apr 17, 2023 at 7:14 PM Stefan Hajnoczi 
> > > > > > > > > 
> > > > > > > > > wrote:
> > > > > > > > > > On Thu, Apr 13, 2023 at 12:14:24PM +0200, Eugenio Perez 
> > > > > > > > > > Martin
> > > > > > > > > > wrote:
> > > > > > > > > > > On Wed, Apr 12, 2023 at 11:06 PM Stefan Hajnoczi <
> > > > > > > > > > > stefa...@redhat.com> wrote:
> > > > > > > > > > > > On Tue, Apr 11, 2023 at 05:05:13PM +0200, Hanna 
> > > > > > > > > > > > Czenczek wrote:
> > > > > > > > > > > > > So-called "internal" virtio-fs migration refers to
> > > > > > > > > > > > > transporting the
> > > > > > > > > > > > > back-end's (virtiofsd's) state through qemu's 
> > > > > > > > > > > > > migration
> > > > > > > > > > > > > stream.  To do
> > > > > > > > > > > > > this, we need to be able to transfer virtiofsd's 
> > > > > > > > > > > > > internal
> > > > > > > > > > > > > state to and
> > > > > > > > > > > > > from virtiofsd.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Because virtiofsd's internal state will not be too 
> > > > > > > > > > > > > large, we
> > > > > > > > > > > > > believe it
> > > > > > > > > > > > > is best to transfer it as a single binary blob after 
> > > > > > > > > > > > > the
> > > > > > > > > > > > > streaming
> > > > > > > > > > > > > phase.  Because this method should be useful to other 
> > > > > > > > > > > > > vhost-
> > > > > > > > > > > > > user
> > > > > > > > > > > > > implementations, too, it is introduced as a 
> > > > > > > > > > > > > general-purpose
> > > > > > > > > > > > > addition to
> > > > > > > > > > > > > the protocol, not limited to vhost-user-fs.
> > > > > > > > > > > > >
> > > > > > > > > > > > > These are the additions to the protocol:
> > > > > > > > > > > > > - New vhost-user protocol feature
> > > > > > > > > > > > > VHOST_USER_PROTOCOL_F_MIGRATORY_STATE:
> > > > > > > > > > > > >This feature signals support for transferring 
> > > > > > > > > > > > > state, and is
> > > > > > > > > > > > > added so
> > > > > > > > > > > > >that migration can fail early when the back-end 
> > > > > > > > > > > > > has no
> > > > > > > > > > > > > support.
> > > > > > > > > > > > >
> > > > > > > > > > > > > - SET_DEVICE_STATE_FD function: Front-end and back-end
> > > > > > > > > > > > > negotiate a pipe
> > > > > > > > > > > > >over which to transfer the state.  The front-end 
> > > > > > > > > > > > > sends an
> > > > > > > > > > > > > FD to the
> > > > > > > > > > > > >back-end into/from which it can write/read its 
> > > > > > > > > > > > > state, and
> > > > > > > > > > > > > the back-end
> > > > > > > > > > > > >can decide to either use it, or reply with a 
> > > > > > > > > > > > > different FD
> > > > > > > > > > > > > for the
> > > > > > > > > > > > >front-end to override the front-end's choice.
> > > > > > > > > > > > >The front-end creates a simple pipe to transfer 
> > > > > > > > > > > > > the state,
> > > > > > > > > > > > > but maybe
> > > > > > > > > > > > >the back-end already has an FD into/from which it 
> > > > > > > > > > > > > has to
> > > > > > > > > > > > > write/read
> > > > > > > > > > > > >its state, in which case it will want to override 
> > > > > > > > > > > > > the
> > > > > > > > > > > > > simple pipe.
> > > > > > > > > > > > >Conversely, maybe in the future we find a way to 
> > > > > > > > > > > > > have the
> > > > > > > > > > > > > front-end
> > > > > > > > > > > > >get an immediate FD for the migration stream (in 
> > > > > > > > > > > > > some
> > > > > > > > > > > > > cases), in which
> > > > > > > > > > > > >case we will want to send this to the back-end 
> > > > > > > > > > > > > instead of
> > > > > > > > > > > > > creating a
> > > > > > > > > > > > >pipe.
> > > > > > > > > > > > >Hence the negotiation: If one side has a better 
> > > > > > > > > > > > > idea than a
> > > > > > > > > > > > > plain
> > > > > > > > > > > > >pipe, we will want to use that.
> > > > > > > > > > > > >
> > > > > > > > > > > > > - CHECK_DEVICE_STATE: After the state has been 
> > > > > > > > > > > > > transferred
> > > > > > > > > > > > > through the
> > > > > > > > > > > > >pipe (the end indicated by EOF), the front-end 
> > > > > > > > > > > > > invokes this
> > > > > > > > > > > > > function
> > > > > > > > > > > > >to

Re: [PATCH v1 1/1] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0

2023-05-09 Thread Juan Quintela

Leonardo Bras  wrote:
> Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK
> set for machine types < 8.0 will cause migration to fail if the target
> QEMU version is < 8.0.0 :
>
> qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 
> device: 0 cmask: ff wmask: 0 w1cmask:0
> qemu-system-x86_64: Failed to load PCIDevice:config
> qemu-system-x86_64: Failed to load e1000e:parent_obj
> qemu-system-x86_64: error while loading state for instance 0x0 of device 
> ':00:02.0/e1000e'
> qemu-system-x86_64: load of migration failed: Invalid argument
>
> The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0,
> with this cmdline:
>
> ./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX]
>
> In order to fix this, property x-pcie-err-unc-mask was introduced to
> control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by
> default, but is disabled if machine type <= 7.2.
>
> Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register")
> Suggested-by: Michael S. Tsirkin 
> Signed-off-by: Leonardo Bras 

Reviewed-by: Juan Quintela

Re: [PATCH 0/4] vhost-user-fs: Internal migration

2023-05-09 Thread Stefan Hajnoczi

On Fri, May 05, 2023 at 04:26:08PM +0200, Eugenio Perez Martin wrote:
> On Fri, May 5, 2023 at 11:51 AM Hanna Czenczek  wrote:
> >
> > (By the way, thanks for the explanations :))
> >
> > On 05.05.23 11:03, Hanna Czenczek wrote:
> > > On 04.05.23 23:14, Stefan Hajnoczi wrote:
> >
> > [...]
> >
> > >> I think it's better to change QEMU's vhost code
> > >> to leave stateful devices suspended (but not reset) across
> > >> vhost_dev_stop() -> vhost_dev_start(), maybe by introducing
> > >> vhost_dev_suspend() and vhost_dev_resume(). Have you thought about
> > >> this aspect?
> > >
> > > Yes and no; I mean, I haven’t in detail, but I thought this is what’s
> > > meant by suspending instead of resetting when the VM is stopped.
> >
> > So, now looking at vhost_dev_stop(), one problem I can see is that
> > depending on the back-end, different operations it does will do
> > different things.
> >
> > It tries to stop the whole device via vhost_ops->vhost_dev_start(),
> > which for vDPA will suspend the device, but for vhost-user will reset it
> > (if F_STATUS is there).
> >
> > It disables all vrings, which doesn’t mean stopping, but may be
> > necessary, too.  (I haven’t yet really understood the use of disabled
> > vrings, I heard that virtio-net would have a need for it.)
> >
> > It then also stops all vrings, though, so that’s OK.  And because this
> > will always do GET_VRING_BASE, this is actually always the same
> > regardless of transport.
> >
> > Finally (for this purpose), it resets the device status via
> > vhost_ops->vhost_reset_status().  This is only implemented on vDPA, and
> > this is what resets the device there.
> >
> >
> > So vhost-user resets the device in .vhost_dev_start, but vDPA only does
> > so in .vhost_reset_status.  It would seem better to me if vhost-user
> > would also reset the device only in .vhost_reset_status, not in
> > .vhost_dev_start.  .vhost_dev_start seems precisely like the place to
> > run SUSPEND/RESUME.
> >
> 
> I think the same. I just saw It's been proposed at [1].
> 
> > Another question I have (but this is basically what I wrote in my last
> > email) is why we even call .vhost_reset_status here.  If the device
> > and/or all of the vrings are already stopped, why do we need to reset
> > it?  Naïvely, I had assumed we only really need to reset the device if
> > the guest changes, so that a new guest driver sees a freshly initialized
> > device.
> >
> 
> I don't know why we didn't need to call it :). I'm assuming the
> previous vhost-user net did fine resetting vq indexes, using
> VHOST_USER_SET_VRING_BASE. But I don't know about more complex
> devices.

It was added so DPDK can batch rx virtqueue RSS updates:

commit 923b8921d210763359e96246a58658ac0db6c645
Author: Yajun Wu 
Date:   Mon Oct 17 14:44:52 2022 +0800

vhost-user: Support vhost_dev_start

The motivation of adding vhost-user vhost_dev_start support is to
improve backend configuration speed and reduce live migration VM
downtime.

Today VQ configuration is issued one by one. For virtio net with
multi-queue support, backend needs to update RSS (Receive side
scaling) on every rx queue enable. Updating RSS is time-consuming
(typical time like 7ms).

Implement already defined vhost status and message in the vhost
specification [1].
(a) VHOST_USER_PROTOCOL_F_STATUS
(b) VHOST_USER_SET_STATUS
(c) VHOST_USER_GET_STATUS

Send message VHOST_USER_SET_STATUS with VIRTIO_CONFIG_S_DRIVER_OK for
device start and reset(0) for device stop.

On reception of the DRIVER_OK message, backend can apply the needed setting
only once (instead of incremental) and also utilize parallelism on enabling
queues.

This improves QEMU's live migration downtime with vhost user backend
implementation by great margin, specially for the large number of VQs of 64
from 800 msec to 250 msec.

[1] https://qemu-project.gitlab.io/qemu/interop/vhost-user.html

Signed-off-by: Yajun Wu 
Acked-by: Parav Pandit 
Message-Id: <20221017064452.1226514-3-yaj...@nvidia.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 

> 
> Thanks!
> 
> [1] 
> https://lore.kernel.org/qemu-devel/20230501230409.274178-1-stefa...@redhat.com/
> 


signature.asc
Description: PGP signature

Re: [PATCH 2/4] vhost-user: Interface for migration state transfer

2023-05-09 Thread Eugenio Perez Martin

On Tue, May 9, 2023 at 11:01 AM Hanna Czenczek  wrote:
>
> On 09.05.23 08:31, Eugenio Perez Martin wrote:
> > On Mon, May 8, 2023 at 9:12 PM Stefan Hajnoczi  wrote:
>
> [...]
>
> >> VHOST_USER_GET_VRING_BASE itself isn't really enough because it stops a
> >> specific virtqueue but not the whole device. Unfortunately stopping all
> >> virtqueues is not the same as SUSPEND since spontaneous device activity
> >> is possible independent of any virtqueue (e.g. virtio-scsi events and
> >> maybe virtio-net link status).
> >>
> >> That's why I think SUSPEND is necessary for a solution that's generic
> >> enough to cover all device types.
> >>
> > I agree.
> >
> > In particular virtiofsd is already resetting all the device at
> > VHOST_USER_GET_VRING_BASE if I'm not wrong, so that's even more of a
> > reason to implement suspend call.
>
> Oh, no, just the vring in question.  Not the whole device.
>
> In addition, we still need the GET_VRING_BASE call anyway, because,
> well, we want to restore the vring on the destination via SET_VRING_BASE.
>

Ok, that makes sense, sorry for the confusion!

Thanks!

Re: [PATCH v3 2/3] target/arm: Select CONFIG_ARM_V7M when TCG is enabled

2023-05-09 Thread Paolo Bonzini


On 5/9/23 16:49, Philippe Mathieu-Daudé wrote:

On 8/5/23 20:16, Fabiano Rosas wrote:

We cannot allow this config to be disabled at the moment as not all of
the relevant code is protected by it.

Commit 29d9efca16 ("arm/Kconfig: Do not build TCG-only boards on a
KVM-only build") moved the CONFIGs of several boards to Kconfig, so it
is now possible that nothing selects ARM_V7M (e.g. when doing a
--without-default-devices build).

Return the CONFIG_ARM_V7M entry to a state where it is always selected
whenever TCG is available.

Fixes: 29d9efca16 ("arm/Kconfig: Do not build TCG-only boards on a 
KVM-only build")

Signed-off-by: Fabiano Rosas 
---
  target/arm/Kconfig | 1 +
  1 file changed, 1 insertion(+)

diff --git a/target/arm/Kconfig b/target/arm/Kconfig
index 3fffdcb61b..5947366f6e 100644
--- a/target/arm/Kconfig
+++ b/target/arm/Kconfig
@@ -1,6 +1,7 @@
  config ARM
  bool
  select ARM_COMPATIBLE_SEMIHOSTING if TCG
+    select ARM_V7M if TCG


Probably worth a comment mentioning this is temporarily
required until , so we won't forgot
to remove it.


Yeah, this one should in principle be defined by the boards, but 
m_helper.c is included unconditionally instead of having some kind of 
stub for A-only boards.


Related to this is the (right now unconditional, later on only "if TCG") 
"select ARM_GICV3_TCG" that needs to be added under ARM_GIC.


Paolo

Re: [PATCH v1 1/1] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0

2023-05-09 Thread Michael S. Tsirkin

On Tue, May 09, 2023 at 07:01:53AM -0700, Peter Xu wrote:
> On Tue, May 02, 2023 at 09:27:02PM -0300, Leonardo Bras wrote:
> > Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK
> > set for machine types < 8.0 will cause migration to fail if the target
> > QEMU version is < 8.0.0 :
> > 
> > qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 
> > 40 device: 0 cmask: ff wmask: 0 w1cmask:0
> > qemu-system-x86_64: Failed to load PCIDevice:config
> > qemu-system-x86_64: Failed to load e1000e:parent_obj
> > qemu-system-x86_64: error while loading state for instance 0x0 of device 
> > ':00:02.0/e1000e'
> > qemu-system-x86_64: load of migration failed: Invalid argument
> > 
> > The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0,
> > with this cmdline:
> > 
> > ./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX]
> > 
> > In order to fix this, property x-pcie-err-unc-mask was introduced to
> > control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by
> > default, but is disabled if machine type <= 7.2.
> > 
> > Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register")
> > Suggested-by: Michael S. Tsirkin 
> > Signed-off-by: Leonardo Bras 
> 
> Since this one blocks mostly all backward migration for current master and
> all the downstream trees, shall we consider pick it up sooner?
> 
> And I think we should make sure qemu-stable gets this too, I'm not pretty
> sure how we normally do that and whether we need explicit CC: to stable
> list, perhaps not..
> 
> Thanks!
> 
> -- 
> Peter Xu


Yes will be in the pull I'm working on, targeting end of week.

-- 
MST

Re: [PATCH] target/m68k: Fix gen_load_fp for OS_LONG

2023-05-09 Thread Philippe Mathieu-Daudé


On 8/5/23 16:08, Richard Henderson wrote:

Case was accidentally dropped in b7a94da9550b.

Signed-off-by: Richard Henderson 
---
  target/m68k/translate.c | 1 +
  1 file changed, 1 insertion(+)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH] ui/dbus: Implement damage regions for GL

2023-05-09 Thread Philippe Mathieu-Daudé

On 9/5/23 17:04, Bilal Elmoussaoui wrote:

cairo declared as optional dep, ...

I don't see where cairo is marked as an optional dependency, the `cairo 
= not_found` part could probably be dropped to make it clearer.

I was expecting:

.require(cairo.found(), error_message: '-display dbus requires cairo')

On Tue, May 9, 2023 at 4:34 PM Philippe Mathieu-Daudé > wrote:

Hi,

On 9/5/23 13:59, Bilal Elmoussaoui wrote:
 > From: Christian Hergert mailto:cherg...@redhat.com>>

 > diff --git a/ui/dbus-listener.c b/ui/dbus-listener.c
 > index 911acdc529..047be5cb3a 100644
 > --- a/ui/dbus-listener.c
 > +++ b/ui/dbus-listener.c
 > @@ -25,6 +25,7 @@
 >   #include "qemu/error-report.h"
 >   #include "sysemu/sysemu.h"
 >   #include "dbus.h"
 > +#include 

cairo used unconditionally.

Shouldn't we now declared it as a strict dependency in meson?

 >   #include 
 >
 >   #ifdef CONFIG_OPENGL

1 2 3 >

1 - 100 of 286 matches

Mail list logo