[PULL 0/4] ppc-for-5.1 queue 20200720

2020-07-19 Thread David Gibson
The following changes since commit 9fc87111005e8903785db40819af66b8f85b8b96:

  Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20200717' into 
staging (2020-07-19 10:29:05 +0100)

are available in the Git repository at:

  git://github.com/dgibson/qemu.git tags/ppc-for-5.1-20200720

for you to fetch changes up to b25fbd6a1302c0eac5b326be3e1f828e905c0c9a:

  pseries: Update SLOF firmware image (2020-07-20 09:21:39 +1000)


ppc patch queue 20200720

Here are some assorted fixes for qemu-5.1:
 * SLOF update with improved TPM handling, and fix for possible stack
   overflows on many-vcpu machines
 * Fix for NUMA distances on NVLink2 attached GPU memory nodes
 * Fixes to fail more gracefully on attempting to plug unsupported PCI bridge 
types
 * Don't allow pnv-psi device to be user created


Alexey Kardashevskiy (1):
  pseries: Update SLOF firmware image

Greg Kurz (2):
  ppc/pnv: Make PSI device types not user creatable
  spapr_pci: Robustify support of PCI bridges

Reza Arbab (1):
  spapr: Add a new level of NUMA for GPUs

 hw/ppc/pnv_psi.c|   1 +
 hw/ppc/spapr.c  |  21 +++--
 hw/ppc/spapr_pci.c  |  56 
 hw/ppc/spapr_pci_nvlink2.c  |  13 +++---
 include/hw/pci-host/spapr.h |   1 +
 include/hw/ppc/spapr.h  |   1 +
 pc-bios/README  |   2 +-
 pc-bios/slof.bin| Bin 965112 -> 968368 bytes
 roms/SLOF   |   2 +-
 9 files changed, 90 insertions(+), 7 deletions(-)



[PULL 3/4] spapr: Add a new level of NUMA for GPUs

2020-07-19 Thread David Gibson
From: Reza Arbab 

NUMA nodes corresponding to GPU memory currently have the same
affinity/distance as normal memory nodes. Add a third NUMA associativity
reference point enabling us to give GPU nodes more distance.

This is guest visible information, which shouldn't change under a
running guest across migration between different qemu versions, so make
the change effective only in new (pseries > 5.0) machine types.

Before, `numactl -H` output in a guest with 4 GPUs (nodes 2-5):

node distances:
node   0   1   2   3   4   5
  0:  10  40  40  40  40  40
  1:  40  10  40  40  40  40
  2:  40  40  10  40  40  40
  3:  40  40  40  10  40  40
  4:  40  40  40  40  10  40
  5:  40  40  40  40  40  10

After:

node distances:
node   0   1   2   3   4   5
  0:  10  40  80  80  80  80
  1:  40  10  80  80  80  80
  2:  80  80  10  80  80  80
  3:  80  80  80  10  80  80
  4:  80  80  80  80  10  80
  5:  80  80  80  80  80  10

These are the same distances as on the host, mirroring the change made
to host firmware in skiboot commit f845a648b8cb ("numa/associativity:
Add a new level of NUMA for GPU's").

Signed-off-by: Reza Arbab 
Message-Id: <20200716225655.24289-1-ar...@linux.ibm.com>
Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 21 +++--
 hw/ppc/spapr_pci.c  |  2 ++
 hw/ppc/spapr_pci_nvlink2.c  | 13 ++---
 include/hw/pci-host/spapr.h |  1 +
 include/hw/ppc/spapr.h  |  1 +
 5 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 299908cc73..0ae293ec94 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -890,10 +890,16 @@ static int spapr_dt_rng(void *fdt)
 static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
 {
 MachineState *ms = MACHINE(spapr);
+SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
 int rtas;
 GString *hypertas = g_string_sized_new(256);
 GString *qemu_hypertas = g_string_sized_new(256);
-uint32_t refpoints[] = { cpu_to_be32(0x4), cpu_to_be32(0x4) };
+uint32_t refpoints[] = {
+cpu_to_be32(0x4),
+cpu_to_be32(0x4),
+cpu_to_be32(0x2),
+};
+uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
 uint64_t max_device_addr = MACHINE(spapr)->device_memory->base +
 memory_region_size((spapr)->device_memory->mr);
 uint32_t lrdr_capacity[] = {
@@ -945,8 +951,12 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void 
*fdt)
  qemu_hypertas->str, qemu_hypertas->len));
 g_string_free(qemu_hypertas, TRUE);
 
+if (smc->pre_5_1_assoc_refpoints) {
+nr_refpoints = 2;
+}
+
 _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
- refpoints, sizeof(refpoints)));
+ refpoints, nr_refpoints * sizeof(refpoints[0])));
 
 _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
  maxdomains, sizeof(maxdomains)));
@@ -4584,9 +4594,16 @@ DEFINE_SPAPR_MACHINE(5_1, "5.1", true);
  */
 static void spapr_machine_5_0_class_options(MachineClass *mc)
 {
+SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+static GlobalProperty compat[] = {
+{ TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" },
+};
+
 spapr_machine_5_1_class_options(mc);
 compat_props_add(mc->compat_props, hw_compat_5_0, hw_compat_5_0_len);
+compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 mc->numa_mem_supported = true;
+smc->pre_5_1_assoc_refpoints = true;
 }
 
 DEFINE_SPAPR_MACHINE(5_0, "5.0", false);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 21681215d4..363cdb3f7b 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -2089,6 +2089,8 @@ static Property spapr_phb_properties[] = {
  pcie_ecs, true),
 DEFINE_PROP_UINT64("gpa", SpaprPhbState, nv2_gpa_win_addr, 0),
 DEFINE_PROP_UINT64("atsd", SpaprPhbState, nv2_atsd_win_addr, 0),
+DEFINE_PROP_BOOL("pre-5.1-associativity", SpaprPhbState,
+ pre_5_1_assoc, false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
index dd8cd6db96..76ae77ebc8 100644
--- a/hw/ppc/spapr_pci_nvlink2.c
+++ b/hw/ppc/spapr_pci_nvlink2.c
@@ -362,9 +362,9 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, 
void *fdt)
 _abort);
 uint32_t associativity[] = {
 cpu_to_be32(0x4),
-SPAPR_GPU_NUMA_ID,
-SPAPR_GPU_NUMA_ID,
-SPAPR_GPU_NUMA_ID,
+cpu_to_be32(nvslot->numa_id),
+cpu_to_be32(nvslot->numa_id),
+cpu_to_be32(nvslot->numa_id),
 cpu_to_be32(nvslot->numa_id)
 };
 uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL);
@@ -375,6 +375,13 @@ void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, 
void *fdt)
 _FDT(off);
 _FDT((fdt_setprop_string(fdt, 

[PULL 2/4] spapr_pci: Robustify support of PCI bridges

2020-07-19 Thread David Gibson
From: Greg Kurz 

Some recent error handling cleanups unveiled issues with our support of
PCI bridges:

1) QEMU aborts when using non-standard PCI bridge types,
   unveiled by commit 7ef1553dac "spapr_pci: Drop some dead error handling"

$ qemu-system-ppc64 -M pseries -device pcie-pci-bridge
Unexpected error in object_property_find() at qom/object.c:1240:
qemu-system-ppc64: -device pcie-pci-bridge: Property '.chassis_nr' not found
Aborted (core dumped)

This happens because we assume all PCI bridge types to have a "chassis_nr"
property. This property only exists with the standard PCI bridge type
"pci-bridge" actually. We could possibly revert 7ef1553dac but it seems
much simpler to check the presence of "chassis_nr" earlier.

2) QEMU abort if same "chassis_nr" value is used several times,
   unveiled by commit d2623129a7de "qom: Drop parameter @errp of
   object_property_add() & friends"

$ qemu-system-ppc64 -M pseries -device pci-bridge,chassis_nr=1 \
-device pci-bridge,chassis_nr=1
Unexpected error in object_property_try_add() at qom/object.c:1167:
qemu-system-ppc64: -device pci-bridge,chassis_nr=1: attempt to add duplicate 
property '4100' to object (type 'container')
Aborted (core dumped)

This happens because we assume that "chassis_nr" values are unique, but
nobody enforces that and we end up generating duplicate DRC ids. The PCI
code doesn't really care for duplicate "chassis_nr" properties since it
is only used to initialize the "Chassis Number Register" of the bridge,
with no functional impact on QEMU. So, even if passing the same value
several times might look weird, it never broke anything before, so
I guess we don't necessarily want to enforce strict checking in the PCI
code now.

Workaround both issues in the PAPR code: check that the bridge has a
unique and non null "chassis_nr" when plugging it into its parent bus.

Fixes: 05929a6c5dfe ("spapr: Don't use bus number for building DRC ids")
Fixes: 7ef1553dac ("spapr_pci: Drop some dead error handling")
Fixes: d2623129a7de ("qom: Drop parameter @errp of object_property_add() & 
friends")
Reported-by: Thomas Huth 
Signed-off-by: Greg Kurz 
Message-Id: <159431476748.407044.16711294833569014964.st...@bahia.lan>
[dwg: Move check slightly to a better place]
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_pci.c | 54 ++
 1 file changed, 54 insertions(+)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 2a6a48744a..21681215d4 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1480,6 +1480,57 @@ static void spapr_pci_bridge_plug(SpaprPhbState *phb,
 add_drcs(phb, bus);
 }
 
+/* Returns non-zero if the value of "chassis_nr" is already in use */
+static int check_chassis_nr(Object *obj, void *opaque)
+{
+int new_chassis_nr =
+object_property_get_uint(opaque, "chassis_nr", _abort);
+int chassis_nr =
+object_property_get_uint(obj, "chassis_nr", NULL);
+
+if (!object_dynamic_cast(obj, TYPE_PCI_BRIDGE)) {
+return 0;
+}
+
+/* Skip unsupported bridge types */
+if (!chassis_nr) {
+return 0;
+}
+
+/* Skip self */
+if (obj == opaque) {
+return 0;
+}
+
+return chassis_nr == new_chassis_nr;
+}
+
+static bool bridge_has_valid_chassis_nr(Object *bridge, Error **errp)
+{
+int chassis_nr =
+object_property_get_uint(bridge, "chassis_nr", NULL);
+
+/*
+ * slotid_cap_init() already ensures that "chassis_nr" isn't null for
+ * standard PCI bridges, so this really tells if "chassis_nr" is present
+ * or not.
+ */
+if (!chassis_nr) {
+error_setg(errp, "PCI Bridge lacks a \"chassis_nr\" property");
+error_append_hint(errp, "Try -device pci-bridge instead.\n");
+return false;
+}
+
+/* We want unique values for "chassis_nr" */
+if (object_child_foreach_recursive(object_get_root(), check_chassis_nr,
+   bridge)) {
+error_setg(errp, "Bridge chassis %d already in use", chassis_nr);
+return false;
+}
+
+return true;
+}
+
 static void spapr_pci_plug(HotplugHandler *plug_handler,
DeviceState *plugged_dev, Error **errp)
 {
@@ -1508,6 +1559,9 @@ static void spapr_pci_plug(HotplugHandler *plug_handler,
 g_assert(drc);
 
 if (pc->is_bridge) {
+if (!bridge_has_valid_chassis_nr(OBJECT(plugged_dev), errp)) {
+return;
+}
 spapr_pci_bridge_plug(phb, PCI_BRIDGE(plugged_dev));
 }
 
-- 
2.26.2




[PULL 1/4] ppc/pnv: Make PSI device types not user creatable

2020-07-19 Thread David Gibson
From: Greg Kurz 

QEMU aborts with -device pnv-psi-POWER8:

$ qemu-system-ppc64 -device pnv-psi-POWER8
qemu-system-ppc64: hw/intc/xics.c:605: ics_realize: Assertion
`ics->xics' failed.
Aborted (core dumped)

The Processor Service Interface Controller is an internal device.
It should only be instantiated by the chip, which takes care of
configuring the link required by the ICS object in the case of
POWER8. It doesn't make sense for a user to specify it on the
command line.

Note that the PSI model for POWER8 was added 3 yrs ago but the
devices weren't available on the command line because of a bug
that was fixed by recent commit 2f35254aa0 ("pnv/psi: Correct
the pnv-psi* devices not to be sysbus devices").

Fixes: 54f59d786c ("ppc/pnv: Add cut down PSI bridge model and hookup external 
interrupt")
Reported-by: Thomas Huth 
Signed-off-by: Greg Kurz 
Message-Id: <159413975752.169116.5808968580649255382.st...@bahia.lan>
Signed-off-by: David Gibson 
---
 hw/ppc/pnv_psi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
index 5bdeec700e..6a479cac53 100644
--- a/hw/ppc/pnv_psi.c
+++ b/hw/ppc/pnv_psi.c
@@ -929,6 +929,7 @@ static void pnv_psi_class_init(ObjectClass *klass, void 
*data)
 dc->desc = "PowerNV PSI Controller";
 device_class_set_props(dc, pnv_psi_properties);
 dc->reset = pnv_psi_reset;
+dc->user_creatable = false;
 }
 
 static const TypeInfo pnv_psi_info = {
-- 
2.26.2




Re: [PATCH] e1000e: using bottom half to send packets

2020-07-19 Thread Li Qiang
Jason Wang  于2020年7月20日周一 下午12:00写道:
>
>
> On 2020/7/17 下午11:46, Li Qiang wrote:
> > Jason Wang  于2020年7月17日周五 下午1:39写道:
> >>
> >> On 2020/7/17 下午12:46, Li Qiang wrote:
> >>> Jason Wang  于2020年7月17日周五 上午11:10写道:
>  On 2020/7/17 上午12:14, Li Qiang wrote:
> > Alexander Bulekov reported a UAF bug related e1000e packets send.
> >
> > -->https://bugs.launchpad.net/qemu/+bug/1886362
> >
> > This is because the guest trigger a e1000e packet send and set the
> > data's address to e1000e's MMIO address. So when the e1000e do DMA
> > it will write the MMIO again and trigger re-entrancy and finally
> > causes this UAF.
> >
> > Paolo suggested to use a bottom half whenever MMIO is doing complicate
> > things in here:
> > -->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html
> >
> > Reference here:
> > 'The easiest solution is to delay processing of descriptors to a bottom
> > half whenever MMIO is doing something complicated.  This is also better
> > for latency because it will free the vCPU thread more quickly and leave
> > the work to the I/O thread.'
>  I think several things were missed in this patch (take virtio-net as a
>  reference), do we need the following things:
> 
> >>> Thanks Jason,
> >>> In fact I know this, I'm scared for touching this but I want to try.
> >>> Thanks for your advice.
> >>>
>  - Cancel the bh when VM is stopped.
> >>> Ok. I think add a vm state change notifier for e1000e can address this.
> >>>
>  - A throttle to prevent bh from executing too much timer?
> >>> Ok, I think add a config timeout and add a timer in e1000e can address 
> >>> this.
> >>
> >> Sorry, a typo. I meant we probably need a tx_burst as what virtio-net did.
> >>
> >>
>  - A flag to record whether or not this a pending tx (and migrate it?)
> >>> Is just a flag enough? Could you explain more about the idea behind
> >>> processing the virtio-net/e1000e using bh like this?
> >>
> >> Virtio-net use a tx_waiting variable to record whether or not there's a
> >> pending bh. (E.g bh is cancelled due to vmstop, we need reschedule it
> >> after vmresume). Maybe we can do something simpler by just schecule bh
> >> unconditionally during vm resuming.
> >>
> >>
> >>> For example, if the guest trigger a lot of packets send and if the bh
> >>> is scheduled in IO thread. So will we lost packets?
> >>
> >> We don't since we don't populate virtqueue which means packets are
> >> queued there.
> >>
> > This remind of me a question:
> > If we use tx_burst like in virtion-net. For detail:
> > If we sent out  'tx_burst' packets per bh. Then we set 'tx_waiting' and
> > then schedule another bh. However if between two bh schedule, the guest 
> > change
> > the e1000e register such 'r->dh' 'r->dlen'. The data is fully corrupted.
> > In fact this issue does exist in my origin patch.  That's
> > What if following happend:
> >
> > vcpu thread: guest write e1000e MMIO to trigger packets send
> > vcpu thread: schedule a bh
> > vcpu thread: return
> > IO thread: begin to run the bh and start send packets
> > vcpu thread: write register again such as  'r->dh' 'r->dlen'..
> >
> > So here the IO thread and vcpu thread will race the register?
> >
> > If I remember correctly, the virtio net has no such problem because it
> > uses ring buffer
> > and the backedn(virtio device) uses the shadow index to index the ring
> > buffer data.
> >
> > What's your idea here?
>
>
> I think we serialize them through bql? (qemu_mutex_lock_iothread())
>

Ok I will try to cook a patch tonight based our discussion.

Thanks,
Li Qiang

> Thanks
>
>
> >
> > Thanks,
> > Li Qiang
> >
> >
> >> Thanks
> >>
> >>
> >>> How we avoid this in virtio-net.
> >>>
> >>> Thanks,
> >>> Li Qiang
> >>>
> >>>
> >>>
>  Thanks
> 
> 
> > This patch fixes this UAF.
> >
> > Signed-off-by: Li Qiang 
> > ---
> > hw/net/e1000e_core.c | 25 +
> > hw/net/e1000e_core.h |  2 ++
> > 2 files changed, 19 insertions(+), 8 deletions(-)
> >
> > diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> > index bcd186cac5..6165b04b68 100644
> > --- a/hw/net/e1000e_core.c
> > +++ b/hw/net/e1000e_core.c
> > @@ -2423,32 +2423,27 @@ e1000e_set_dbal(E1000ECore *core, int index, 
> > uint32_t val)
> > static void
> > e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
> > {
> > -E1000E_TxRing txr;
> > core->mac[index] = val;
> >
> > if (core->mac[TARC0] & E1000_TARC_ENABLE) {
> > -e1000e_tx_ring_init(core, , 0);
> > -e1000e_start_xmit(core, );
> > +qemu_bh_schedule(core->tx[0].tx_bh);
> > }
> >
> > if (core->mac[TARC1] & E1000_TARC_ENABLE) {
> > -e1000e_tx_ring_init(core, , 1);
> > -e1000e_start_xmit(core, );
> > +

Re: [RFC v2 1/1] memory: Delete assertion in memory_region_unregister_iommu_notifier

2020-07-19 Thread Jason Wang



On 2020/7/17 下午10:18, Peter Xu wrote:

On Thu, Jul 16, 2020 at 10:54:31AM +0800, Jason Wang wrote:

On 2020/7/16 上午9:00, Peter Xu wrote:

On Mon, Jul 13, 2020 at 12:04:16PM +0800, Jason Wang wrote:

On 2020/7/10 下午9:30, Peter Xu wrote:

On Fri, Jul 10, 2020 at 02:34:11PM +0800, Jason Wang wrote:

On 2020/7/9 下午10:10, Peter Xu wrote:

On Thu, Jul 09, 2020 at 01:58:33PM +0800, Jason Wang wrote:

- If we care the performance, it's better to implement the MAP event for
vhost, otherwise it could be a lot of IOTLB miss

I feel like these are two things.

So far what we are talking about is whether vt-d should have knowledge about
what kind of events one iommu notifier is interested in.  I still think we
should keep this as answered in question 1.

The other question is whether we want to switch vhost from UNMAP to MAP/UNMAP
events even without vDMA, so that vhost can establish the mapping even before
IO starts.  IMHO it's doable, but only if the guest runs DPDK workloads.  When
the guest is using dynamic iommu page mappings, I feel like that can be even
slower, because then the worst case is for each IO we'll need to vmexit twice:

   - The first vmexit caused by an invalidation to MAP the page tables, so 
vhost
 will setup the page table before IO starts

   - IO/DMA triggers and completes

   - The second vmexit caused by another invalidation to UNMAP the page 
tables

So it seems to be worse than when vhost only uses UNMAP like right now.  At
least we only have one vmexit (when UNMAP).  We'll have a vhost translate()
request from kernel to userspace, but IMHO that's cheaper than the vmexit.

Right but then I would still prefer to have another notifier.

Since vtd_page_walk has nothing to do with device IOTLB. IOMMU have a
dedicated command for flushing device IOTLB. But the check for
vtd_as_has_map_notifier is used to skip the device which can do demand
paging via ATS or device specific way. If we have two different notifiers,
vhost will be on the device iotlb notifier so we don't need it at all?

But we can still have iommu notifier that only registers to UNMAP even after we
introduce dev-iotlb notifier?  We don't want to do page walk for them as well.
TCG should be the only one so far, but I don't know.. maybe there can still be
new ones?

I think you're right. But looking at the codes, it looks like the check of
vtd_as_has_map_notifier() was only used in:

1) vtd_iommu_replay()
2) vtd_iotlb_page_invalidate_notify() (PSI)

For the replay, it's expensive anyhow. For PSI, I think it's just about one
or few mappings, not sure it will have obvious performance impact.

And I had two questions:

1) The codes doesn't check map for DSI or GI, does this match what spec
said? (It looks to me the spec is unclear in this part)

Both DSI/GI should cover maps too?  E.g. vtd_sync_shadow_page_table() in
vtd_iotlb_domain_invalidate().

I meant the code doesn't check whether there's an MAP notifier :)

It's actually checked, because it loops over vtd_as_with_notifiers, and only
MAP notifiers register to that. :)


I may miss something but I don't find the code to block UNMAP notifiers?

vhost_iommu_region_add()
     memory_region_register_iommu_notifier()
         memory_region_update_iommu_notify_flags()
             imrc->notify_flag_changed()
                 vtd_iommu_notify_flag_changed()

?

Yeah I think you're right.  I might have confused with some previous
implementations.  Maybe we should also do similar thing for DSI/GI just like
what we do in PSI.



Ok.





2) for the replay() I don't see other implementations (either spapr or
generic one) that did unmap (actually they skip unmap explicitly), any
reason for doing this in intel IOMMU?

I could be wrong, but I'd guess it's because vt-d implemented the caching mode
by leveraging the same invalidation strucuture, so it's harder to make all
things right (IOW, we can't clearly identify MAP with UNMAP when we receive an
invalidation request, because MAP/UNMAP requests look the same).

I didn't check others, but I believe spapr is doing it differently by using
some hypercalls to deliver IOMMU map/unmap requests, which seems a bit close to
what virtio-iommu is doing.  Anyway, the point is if we have explicit MAP/UNMAP
from the guest, logically the replay indeed does not need to do any unmap
because we don't need to call replay() on an already existing device but only
for e.g. hot plug.

But this looks conflict with what memory_region_iommu_replay( ) did, for
IOMMU that doesn't have a replay method, it skips UNMAP request:

      for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
      iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx);
      if (iotlb.perm != IOMMU_NONE) {
      n->notify(n, );
      }

I guess there's no knowledge of whether guest have an explicit MAP/UMAP for
this generic code. Or replay implies that guest doesn't have explicit
MAP/UNMAP?

I think it matches exactly with a 

Re: [PATCH] e1000e: using bottom half to send packets

2020-07-19 Thread Jason Wang



On 2020/7/17 下午11:52, Peter Maydell wrote:

On Fri, 17 Jul 2020 at 04:11, Jason Wang  wrote:

I think several things were missed in this patch (take virtio-net as a
reference), do we need the following things:

- Cancel the bh when VM is stopped.

Similarly, what should we do with the bh when the device
is reset ?



I think we need cancel the bh.

Thanks





- A throttle to prevent bh from executing too much timer?
- A flag to record whether or not this a pending tx (and migrate it?)

thanks
-- PMM






Re: [PATCH] e1000e: using bottom half to send packets

2020-07-19 Thread Jason Wang



On 2020/7/17 下午11:46, Li Qiang wrote:

Jason Wang  于2020年7月17日周五 下午1:39写道:


On 2020/7/17 下午12:46, Li Qiang wrote:

Jason Wang  于2020年7月17日周五 上午11:10写道:

On 2020/7/17 上午12:14, Li Qiang wrote:

Alexander Bulekov reported a UAF bug related e1000e packets send.

-->https://bugs.launchpad.net/qemu/+bug/1886362

This is because the guest trigger a e1000e packet send and set the
data's address to e1000e's MMIO address. So when the e1000e do DMA
it will write the MMIO again and trigger re-entrancy and finally
causes this UAF.

Paolo suggested to use a bottom half whenever MMIO is doing complicate
things in here:
-->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html

Reference here:
'The easiest solution is to delay processing of descriptors to a bottom
half whenever MMIO is doing something complicated.  This is also better
for latency because it will free the vCPU thread more quickly and leave
the work to the I/O thread.'

I think several things were missed in this patch (take virtio-net as a
reference), do we need the following things:


Thanks Jason,
In fact I know this, I'm scared for touching this but I want to try.
Thanks for your advice.


- Cancel the bh when VM is stopped.

Ok. I think add a vm state change notifier for e1000e can address this.


- A throttle to prevent bh from executing too much timer?

Ok, I think add a config timeout and add a timer in e1000e can address this.


Sorry, a typo. I meant we probably need a tx_burst as what virtio-net did.



- A flag to record whether or not this a pending tx (and migrate it?)

Is just a flag enough? Could you explain more about the idea behind
processing the virtio-net/e1000e using bh like this?


Virtio-net use a tx_waiting variable to record whether or not there's a
pending bh. (E.g bh is cancelled due to vmstop, we need reschedule it
after vmresume). Maybe we can do something simpler by just schecule bh
unconditionally during vm resuming.



For example, if the guest trigger a lot of packets send and if the bh
is scheduled in IO thread. So will we lost packets?


We don't since we don't populate virtqueue which means packets are
queued there.


This remind of me a question:
If we use tx_burst like in virtion-net. For detail:
If we sent out  'tx_burst' packets per bh. Then we set 'tx_waiting' and
then schedule another bh. However if between two bh schedule, the guest change
the e1000e register such 'r->dh' 'r->dlen'. The data is fully corrupted.
In fact this issue does exist in my origin patch.  That's
What if following happend:

vcpu thread: guest write e1000e MMIO to trigger packets send
vcpu thread: schedule a bh
vcpu thread: return
IO thread: begin to run the bh and start send packets
vcpu thread: write register again such as  'r->dh' 'r->dlen'..

So here the IO thread and vcpu thread will race the register?

If I remember correctly, the virtio net has no such problem because it
uses ring buffer
and the backedn(virtio device) uses the shadow index to index the ring
buffer data.

What's your idea here?



I think we serialize them through bql? (qemu_mutex_lock_iothread())

Thanks




Thanks,
Li Qiang



Thanks



How we avoid this in virtio-net.

Thanks,
Li Qiang




Thanks



This patch fixes this UAF.

Signed-off-by: Li Qiang 
---
hw/net/e1000e_core.c | 25 +
hw/net/e1000e_core.h |  2 ++
2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index bcd186cac5..6165b04b68 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -2423,32 +2423,27 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t 
val)
static void
e1000e_set_tctl(E1000ECore *core, int index, uint32_t val)
{
-E1000E_TxRing txr;
core->mac[index] = val;

if (core->mac[TARC0] & E1000_TARC_ENABLE) {
-e1000e_tx_ring_init(core, , 0);
-e1000e_start_xmit(core, );
+qemu_bh_schedule(core->tx[0].tx_bh);
}

if (core->mac[TARC1] & E1000_TARC_ENABLE) {
-e1000e_tx_ring_init(core, , 1);
-e1000e_start_xmit(core, );
+qemu_bh_schedule(core->tx[1].tx_bh);
}
}

static void
e1000e_set_tdt(E1000ECore *core, int index, uint32_t val)
{
-E1000E_TxRing txr;
int qidx = e1000e_mq_queue_idx(TDT, index);
uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1;

core->mac[index] = val & 0x;

if (core->mac[tarc_reg] & E1000_TARC_ENABLE) {
-e1000e_tx_ring_init(core, , qidx);
-e1000e_start_xmit(core, );
+qemu_bh_schedule(core->tx[qidx].tx_bh);
}
}

@@ -3322,6 +3317,16 @@ e1000e_vm_state_change(void *opaque, int running, 
RunState state)
}
}

+static void e1000e_core_tx_bh(void *opaque)
+{
+struct e1000e_tx *tx = opaque;
+E1000ECore *core = tx->core;
+E1000E_TxRing txr;
+
+e1000e_tx_ring_init(core, , tx - >tx[0]);
+e1000e_start_xmit(core, );
+}
+
void

Re: device compatibility interface for live migration with assigned devices

2020-07-19 Thread Jason Wang



On 2020/7/18 上午12:12, Alex Williamson wrote:

On Thu, 16 Jul 2020 16:32:30 +0800
Yan Zhao  wrote:


On Thu, Jul 16, 2020 at 12:16:26PM +0800, Jason Wang wrote:

On 2020/7/14 上午7:29, Yan Zhao wrote:

hi folks,
we are defining a device migration compatibility interface that helps upper
layer stack like openstack/ovirt/libvirt to check if two devices are
live migration compatible.
The "devices" here could be MDEVs, physical devices, or hybrid of the two.
e.g. we could use it to check whether
- a src MDEV can migrate to a target MDEV,
- a src VF in SRIOV can migrate to a target VF in SRIOV,
- a src MDEV can migration to a target VF in SRIOV.
(e.g. SIOV/SRIOV backward compatibility case)

The upper layer stack could use this interface as the last step to check
if one device is able to migrate to another device before triggering a real
live migration procedure.
we are not sure if this interface is of value or help to you. please don't
hesitate to drop your valuable comments.


(1) interface definition
The interface is defined in below way:

   __userspace
/\  \
   / \write
  / read  \
 /__   ___\|/_
| migration_version | | migration_version |-->check migration
- -   compatibility
   device Adevice B


a device attribute named migration_version is defined under each device's
sysfs node. e.g. 
(/sys/bus/pci/devices/\:00\:02.0/$mdev_UUID/migration_version).


Are you aware of the devlink based device management interface that is
proposed upstream? I think it has many advantages over sysfs, do you
consider to switch to that?


Advantages, such as?



My understanding for devlink(netlink) over sysfs (some are mentioned at 
the time of vDPA sysfs mgmt API discussion) are:


- existing users (NIC, crypto, SCSI, ib), mature and stable
- much better error reporting (ext_ack other than string or errno)
- namespace aware
- do not couple with kobject

Thanks




Re: [PATCH] net: check payload length limit for all frames

2020-07-19 Thread Alexander Bulekov
On 200720 0754, P J P wrote:
> +-- On Fri, 17 Jul 2020, Li Qiang wrote --+
> | P J P  于2020年7月17日周五 下午5:09写道:
> | > @Alex, would it be possible to share the reproduces on the upstream bug 
> | > LP#1886362?
> | 
> | Maybe you mean the reproducer of your patch?
> 
> Yes.
> 
> | If you or Alex could share it, I'm glad to analysis this issue.
> 
> @Alex ...?

Happy to share it, as long as it is fine with qemu-security:
I reduced it further, from the original that I sent:

cat << EOF | ./i386-softmmu/qemu-system-i386 -M pc-q35-5.0 \
-netdev user,id=qtest-bn0 -device e1000e,netdev=qtest-bn0 \
-display none -nodefaults -nographic -qtest stdio
outl 0xcf8 0x8810
outl 0xcfc 0xe000
outl 0xcf8 0x8804
outw 0xcfc 0x7
write 0xe758 0x4 0xf1ff
write 0xe760 0x6 0xdf00
write 0xe768 0x4 0x0ef1
write 0xe0005008 0x4 0x1827
write 0xec 0x1 0x66
write 0xe03320 0x1 0xff
write 0xe03620 0x1 0xff
write 0xe0f3 0x1 0xdf
write 0xe100 0x6 0xdfdf
write 0xe110 0x5 0xdfdf00
write 0xe11a 0x3 0xff
write 0xe128 0x5 0x7e00ff
write 0xe403 0x1 0xdf
write 0xe420 0x4 0xdfdf
write 0xe42a 0x3 0xff
write 0xe438 0x1 0x7e
EOF

Here are the ASAN Stacktraces:

=
==7909==ERROR: AddressSanitizer: heap-use-after-free on address 0x60632f18 
at pc 0x561dd4409b96 bp 0x7ffdfe983030 sp 0x7ffdfe983028
READ of size 8 at 0x60632f18 thread T0
#0 0x561dd4409b95 in e1000e_write_packet_to_guest 
/home/alxndr/Development/qemu/hw/net/e1000e_core.c:1587:41
#1 0x561dd4403fa2 in e1000e_receive_iov 
/home/alxndr/Development/qemu/hw/net/e1000e_core.c:1709:9
#2 0x561dd43fd91a in e1000e_nc_receive_iov 
/home/alxndr/Development/qemu/hw/net/e1000e.c:213:12
#3 0x561dd43c82e7 in net_tx_pkt_sendv 
/home/alxndr/Development/qemu/hw/net/net_tx_pkt.c:553:9
#4 0x561dd43c65e6 in net_tx_pkt_send 
/home/alxndr/Development/qemu/hw/net/net_tx_pkt.c:629:9
#5 0x561dd43c9c78 in net_tx_pkt_send_loopback 
/home/alxndr/Development/qemu/hw/net/net_tx_pkt.c:642:11
#6 0x561dd4472cf6 in e1000e_tx_pkt_send 
/home/alxndr/Development/qemu/hw/net/e1000e_core.c:664:16
#7 0x561dd446f296 in e1000e_process_tx_desc 
/home/alxndr/Development/qemu/hw/net/e1000e_core.c:743:17
#8 0x561dd446ce68 in e1000e_start_xmit 
/home/alxndr/Development/qemu/hw/net/e1000e_core.c:934:9
#9 0x561dd445635d in e1000e_set_tdt 
/home/alxndr/Development/qemu/hw/net/e1000e_core.c:2451:9
#10 0x561dd440f19e in e1000e_core_write 
/home/alxndr/Development/qemu/hw/net/e1000e_core.c:3265:9
#11 0x561dd43f77b7 in e1000e_mmio_write 
/home/alxndr/Development/qemu/hw/net/e1000e.c:109:5
#12 0x561dd2ff62a3 in memory_region_write_accessor 
/home/alxndr/Development/qemu/softmmu/memory.c:483:5
#13 0x561dd2ff5747 in access_with_adjusted_size 
/home/alxndr/Development/qemu/softmmu/memory.c:544:18
#14 0x561dd2ff3366 in memory_region_dispatch_write 
/home/alxndr/Development/qemu/softmmu/memory.c:1465:16
#15 0x561dd23a5476 in flatview_write_continue 
/home/alxndr/Development/qemu/exec.c:3176:23
#16 0x561dd238de86 in flatview_write 
/home/alxndr/Development/qemu/exec.c:3216:14
#17 0x561dd238d9a7 in address_space_write 
/home/alxndr/Development/qemu/exec.c:3307:18
#18 0x561dd30a43b1 in qtest_process_command 
/home/alxndr/Development/qemu/softmmu/qtest.c:567:9
#19 0x561dd3094b38 in qtest_process_inbuf 
/home/alxndr/Development/qemu/softmmu/qtest.c:710:9
#20 0x561dd30937c5 in qtest_read 
/home/alxndr/Development/qemu/softmmu/qtest.c:722:5
#21 0x561dd5f33993 in qemu_chr_be_write_impl 
/home/alxndr/Development/qemu/chardev/char.c:188:9
#22 0x561dd5f33b17 in qemu_chr_be_write 
/home/alxndr/Development/qemu/chardev/char.c:200:9
#23 0x561dd5f47e03 in fd_chr_read 
/home/alxndr/Development/qemu/chardev/char-fd.c:68:9
#24 0x561dd609c1c4 in qio_channel_fd_source_dispatch 
/home/alxndr/Development/qemu/io/channel-watch.c:84:12
#25 0x7f58c0237897 in g_main_context_dispatch 
(/usr/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x4e897)
#26 0x561dd6495f2b in glib_pollfds_poll 
/home/alxndr/Development/qemu/util/main-loop.c:217:9
#27 0x561dd649365b in os_host_main_loop_wait 
/home/alxndr/Development/qemu/util/main-loop.c:240:5
#28 0x561dd6492ff4 in main_loop_wait 
/home/alxndr/Development/qemu/util/main-loop.c:516:11
#29 0x561dd30b4e00 in qemu_main_loop 
/home/alxndr/Development/qemu/softmmu/vl.c:1676:9
#30 0x561dd60d3fb1 in main /home/alxndr/Development/qemu/softmmu/main.c:49:5
#31 0x7f58bedbde0a in __libc_start_main 
/build/glibc-GwnBeO/glibc-2.30/csu/../csu/libc-start.c:308:16
#32 0x561dd2298919 in _start 
(/home/alxndr/Development/qemu/build-asan/i386-softmmu/qemu-system-i386+0x2b3d919)

0x60632f18 is located 24 bytes inside of 64-byte region 
[0x60632f00,0x60632f40)
freed by thread T0 here:
#0 0x561dd231108d in free 

[PATCH] hw/i386/kvm/ioapic.c: fix typo in error message

2020-07-19 Thread Kenta Ishiguro
Fix a typo in an error message for KVM_SET_IRQCHIP ioctl:
"KVM_GET_IRQCHIP" should be "KVM_SET_IRQCHIP".

Signed-off-by: Kenta Ishiguro 
---
 hw/i386/kvm/ioapic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c
index 4ba8e47251..c5528df942 100644
--- a/hw/i386/kvm/ioapic.c
+++ b/hw/i386/kvm/ioapic.c
@@ -97,7 +97,7 @@ static void kvm_ioapic_put(IOAPICCommonState *s)
 
 ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, );
 if (ret < 0) {
-fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret));
+fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(ret));
 abort();
 }
 }
-- 
2.17.1




Re: [PATCH] net: check payload length limit for all frames

2020-07-19 Thread P J P
+-- On Fri, 17 Jul 2020, Li Qiang wrote --+
| P J P  于2020年7月17日周五 下午5:09写道:
| > @Alex, would it be possible to share the reproduces on the upstream bug 
| > LP#1886362?
| 
| Maybe you mean the reproducer of your patch?

Yes.

| If you or Alex could share it, I'm glad to analysis this issue.

@Alex ...?

Thank you.
--
Prasad J Pandit / Red Hat Product Security Team
8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D

[Bug 1888165] Re: loopz/loopnz clearing previous instruction's modified flags on cx -> 0

2020-07-19 Thread Jeffrey
** Attachment added: "source"
   
https://bugs.launchpad.net/qemu/+bug/1888165/+attachment/5394190/+files/loopnzbug.asm

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1888165

Title:
  loopz/loopnz clearing previous instruction's modified flags on cx -> 0

Status in QEMU:
  New

Bug description:
  If you run QBasic in qemu, printing a double-type single-digit number
  will print an extra decimal point (e.g. PRINT CDBL(3) prints "3.")
  that does not appear when running on a real CPU (or on qemu with
  -enable-kvm). I tracked this down to the state of the status flags
  after a loopnz instruction.

  After executing a sequence like this in qemu:

mov bx,1
mov cx,1
dec bx; sets Z bit in flags
  A:loopnz A  ; should not modify flags

  Z is incorrectly clear afterwards. loopz does the same thing (but not
  plain loop). Interestingly, inserting pushf+popf after dec results in
  Z set, so loopnz/loopz does not always clear Z itself but is rather
  interfering with the previous instruction's flag setting.

  Version 5.1.0-rc0, x86-64 host.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1888165/+subscriptions



[Bug 1888165] Re: loopz/loopnz clearing previous instruction's modified flags on cx -> 0

2020-07-19 Thread Jeffrey
** Attachment added: "bootable image demonstrating bug"
   
https://bugs.launchpad.net/qemu/+bug/1888165/+attachment/5394189/+files/loopnzbug.img

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1888165

Title:
  loopz/loopnz clearing previous instruction's modified flags on cx -> 0

Status in QEMU:
  New

Bug description:
  If you run QBasic in qemu, printing a double-type single-digit number
  will print an extra decimal point (e.g. PRINT CDBL(3) prints "3.")
  that does not appear when running on a real CPU (or on qemu with
  -enable-kvm). I tracked this down to the state of the status flags
  after a loopnz instruction.

  After executing a sequence like this in qemu:

mov bx,1
mov cx,1
dec bx; sets Z bit in flags
  A:loopnz A  ; should not modify flags

  Z is incorrectly clear afterwards. loopz does the same thing (but not
  plain loop). Interestingly, inserting pushf+popf after dec results in
  Z set, so loopnz/loopz does not always clear Z itself but is rather
  interfering with the previous instruction's flag setting.

  Version 5.1.0-rc0, x86-64 host.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1888165/+subscriptions



[Bug 1888165] [NEW] loopz/loopnz clearing previous instruction's modified flags on cx -> 0

2020-07-19 Thread Jeffrey
Public bug reported:

If you run QBasic in qemu, printing a double-type single-digit number
will print an extra decimal point (e.g. PRINT CDBL(3) prints "3.") that
does not appear when running on a real CPU (or on qemu with -enable-
kvm). I tracked this down to the state of the status flags after a
loopnz instruction.

After executing a sequence like this in qemu:

mov bx,1
mov cx,1
dec bx; sets Z bit in flags
A:  loopnz A  ; should not modify flags

Z is incorrectly clear afterwards. loopz does the same thing (but not
plain loop). Interestingly, inserting pushf+popf after dec results in Z
set, so loopnz/loopz does not always clear Z itself but is rather
interfering with the previous instruction's flag setting.

Version 5.1.0-rc0, x86-64 host.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1888165

Title:
  loopz/loopnz clearing previous instruction's modified flags on cx -> 0

Status in QEMU:
  New

Bug description:
  If you run QBasic in qemu, printing a double-type single-digit number
  will print an extra decimal point (e.g. PRINT CDBL(3) prints "3.")
  that does not appear when running on a real CPU (or on qemu with
  -enable-kvm). I tracked this down to the state of the status flags
  after a loopnz instruction.

  After executing a sequence like this in qemu:

mov bx,1
mov cx,1
dec bx; sets Z bit in flags
  A:loopnz A  ; should not modify flags

  Z is incorrectly clear afterwards. loopz does the same thing (but not
  plain loop). Interestingly, inserting pushf+popf after dec results in
  Z set, so loopnz/loopz does not always clear Z itself but is rather
  interfering with the previous instruction's flag setting.

  Version 5.1.0-rc0, x86-64 host.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1888165/+subscriptions



[Bug 1880287] Re: gcc crashes in hppa emulation

2020-07-19 Thread Helge Deller
Sven Schnelle (sv...@stackframe.org) noticed that increasing
-#define TCG_MAX_TEMPS 512
+#define TCG_MAX_TEMPS 1024
in include/tcg/tcg.h prevents fixes that crash.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1880287

Title:
  gcc crashes in hppa emulation

Status in QEMU:
  New

Bug description:
  There seems to be a translation bug in the qemu-hppa (qemu v5.0.0) emulation:
  A stripped down testcase (taken from Linux kernel build) is attached.

  In there is "a.sh", a shell script which calls gcc-9 (fails with both
  debian gcc-9.3.0-11 or gcc-9.3.0-12). and "a.iii", the preprocessed
  source.

  When starting a.sh, in the emulation gcc crashes with segfault.
  On real hardware gcc succeeds to compile the source.

  In a hppa-user chroot running "apt update && apt install gcc-9" should
  be sufficient to get the needed reproducer environment.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1880287/+subscriptions



QEMU | Pipeline #168317253 has failed for master | 9fc87111

2020-07-19 Thread GitLab via


Your pipeline has failed.

Project: QEMU ( https://gitlab.com/qemu-project/qemu )
Branch: master ( https://gitlab.com/qemu-project/qemu/-/commits/master )

Commit: 9fc87111 ( 
https://gitlab.com/qemu-project/qemu/-/commit/9fc87111005e8903785db40819af66b8f85b8b96
 )
Commit Message: Merge remote-tracking branch 'remotes/rth/tags/...
Commit Author: Peter Maydell ( https://gitlab.com/pm215 )

Pipeline #168317253 ( 
https://gitlab.com/qemu-project/qemu/-/pipelines/168317253 ) triggered by Alex 
Bennée ( https://gitlab.com/stsquad )
had 1 failed build.

Job #645799805 ( https://gitlab.com/qemu-project/qemu/-/jobs/645799805/raw )

Stage: build
Name: build-fuzzer
Trace: ==1==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 2359296 byte(s) in 9 object(s) allocated from:
#0 0x5570060105d7 in calloc 
(/builds/qemu-project/qemu/build/x86_64-softmmu/qemu-system-x86_64+0x2bdb5d7)
#1 0x55700605ddf9 in bitmap_try_new 
/builds/qemu-project/qemu/include/qemu/bitmap.h:96:12
#2 0x55700605ddf9 in bitmap_new 
/builds/qemu-project/qemu/include/qemu/bitmap.h:101:26
#3 0x55700605ddf9 in dirty_memory_extend 
/builds/qemu-project/qemu/exec.c:2219:37
#4 0x55700605ddf9 in ram_block_add /builds/qemu-project/qemu/exec.c:2268:9
#5 0x5570060611b4 in qemu_ram_alloc_internal 
/builds/qemu-project/qemu/exec.c:2441:5
#6 0x557006061567 in qemu_ram_alloc /builds/qemu-project/qemu/exec.c:2460:12
#7 0x55700675d350 in memory_region_init_ram_shared_nomigrate 
/builds/qemu-project/qemu/softmmu/memory.c:1514:21
#8 0x557006bdd127 in ram_backend_memory_alloc 
/builds/qemu-project/qemu/backends/hostmem-ram.c:30:5
#9 0x557006bd9733 in host_memory_backend_memory_complete 
/builds/qemu-project/qemu/backends/hostmem.c:333:9
#10 0x557007a20ffc in user_creatable_complete 
/builds/qemu-project/qemu/qom/object_interfaces.c:23:9
#11 0x557007a2178a in user_creatable_add_type 
/builds/qemu-project/qemu/qom/object_interfaces.c:93:10
#12 0x557007a219dc in user_creatable_add_dict 
/builds/qemu-project/qemu/qom/object_interfaces.c:134:11
#13 0x557007ee7eb6 in qmp_dispatch 
/builds/qemu-project/qemu/qapi/qmp-dispatch.c:155:5
#14 0x5570077452a8 in monitor_qmp_dispatch 
/builds/qemu-project/qemu/monitor/qmp.c:145:11
#15 0x55700774411d in monitor_qmp_bh_dispatcher 
/builds/qemu-project/qemu/monitor/qmp.c:234:9
#16 0x557008065c66 in aio_bh_poll 
/builds/qemu-project/qemu/util/async.c:164:13
#17 0x55700800235c in aio_dispatch 
/builds/qemu-project/qemu/util/aio-posix.c:380:5
#18 0x55700806a62c in aio_ctx_dispatch 
/builds/qemu-project/qemu/util/async.c:306:5
#19 0x7f93662807ae in g_main_context_dispatch 
(/lib64/libglib-2.0.so.0+0x527ae)

SUMMARY: AddressSanitizer: 2359296 byte(s) leaked in 9 allocation(s).
/builds/qemu-project/qemu/tests/qtest/libqtest.c:166: kill_qemu() tried to 
terminate QEMU process but encountered exit status 1 (expected 0)
ERROR qmp-cmd-test - too few tests run (expected 51, got 50)
make: *** [/builds/qemu-project/qemu/tests/Makefile.include:650: 
check-qtest-x86_64] Error 1
section_end:1595186229:step_script
ERROR: Job failed: exit code 1



-- 
You're receiving this email because of your account on gitlab.com.





Re: [PULL for-5.1 0/3] tcg patch queue

2020-07-19 Thread Peter Maydell
On Fri, 17 Jul 2020 at 19:16, Richard Henderson
 wrote:
>
> The following changes since commit 95d1fbabae0cd44156ac4b96d512d143ca7dfd5e:
>
>   Merge remote-tracking branch 
> 'remotes/kraxel/tags/fixes-20200716-pull-request' into staging (2020-07-16 
> 18:50:51 +0100)
>
> are available in the Git repository at:
>
>   https://github.com/rth7680/qemu.git tags/pull-tcg-20200717
>
> for you to fetch changes up to ba3c35d9c4026361fd380b269dc6def9510b7166:
>
>   tcg/cpu-exec: precise single-stepping after an interrupt (2020-07-17 
> 11:09:34 -0700)
>
> 
> Fix vector min/max fallback expansion
> Fix singlestep from exception and interrupt

Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/5.1
for any user-visible changes.

-- PMM



Re: [Bug 1880287] Re: gcc crashes in hppa emulation

2020-07-19 Thread svens
On Fri, Jul 17, 2020 at 09:26:50PM -, Helge Deller wrote:
> Test still crashes the VM and chroot with up-to-date debian chroot,
> including updated gcc-9.3.0-14.
> 
> -- 
> You received this bug notification because you are a member of qemu-
> devel-ml, which is subscribed to QEMU.
> https://bugs.launchpad.net/bugs/1880287
> 
> Title:
>   gcc crashes in hppa emulation
> 
> Status in QEMU:
>   New
> 
> Bug description:
>   There seems to be a translation bug in the qemu-hppa (qemu v5.0.0) 
> emulation:
>   A stripped down testcase (taken from Linux kernel build) is attached.
> 
>   In there is "a.sh", a shell script which calls gcc-9 (fails with both
>   debian gcc-9.3.0-11 or gcc-9.3.0-12). and "a.iii", the preprocessed
>   source.
> 
>   When starting a.sh, in the emulation gcc crashes with segfault.
>   On real hardware gcc succeeds to compile the source.
> 
>   In a hppa-user chroot running "apt update && apt install gcc-9" should
>   be sufficient to get the needed reproducer environment.
> 
> To manage notifications about this bug go to:
> https://bugs.launchpad.net/qemu/+bug/1880287/+subscriptions
> 

I reproduced this here and it looks like we're running out of TCG temps:

hread 3 "qemu-system-hpp" received signal SIGABRT, Aborted.
[Switching to Thread 0x7fcb5700 (LWP 3208)]
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
50  ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1  0x7fcb680a455b in __GI_abort () at abort.c:79
#2  0x7fcb680a442f in __assert_fail_base
(fmt=0x7fcb6820ab48 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", 
assertion=0x55cc6120e68c "n < 512", file=0x55cc6120c569 
"/home/svens/qemu/tcg/tcg.c", line=1156, function=) at 
assert.c:92
#3  0x7fcb680b3092 in __GI___assert_fail
(assertion=0x55cc6120e68c "n < 512", file=0x55cc6120c569 
"/home/svens/qemu/tcg/tcg.c", line=1156, function=0x55cc6120f768 
<__PRETTY_FUNCTION__.37440> "tcg_temp_alloc") at assert.c:101
#4  0x55cc60cd57ae in tcg_temp_alloc (s=0x7fcadb60) at 
/home/svens/qemu/tcg/tcg.c:1156
#5  0x55cc60cd5bd6 in tcg_temp_new_internal (type=TCG_TYPE_I32, 
temp_local=false) at /home/svens/qemu/tcg/tcg.c:1273
#6  0x55cc60dda222 in tcg_temp_new_i32 () at 
/home/svens/qemu/include/tcg/tcg.h:899
#7  0x55cc60de760c in do_sub (ctx=0x7fcb5fffe2e0, rt=2, in1=0x430, 
in2=0x9e0, is_tsv=false, is_b=false, is_tc=false, cf=0) at 
/home/svens/qemu/target/hppa/translate.c:1247
#8  0x55cc60de7a04 in do_sub_reg (ctx=0x7fcb5fffe2e0, a=0x7fcb5fffe1d0, 
is_tsv=false, is_b=false, is_tc=false) at 
/home/svens/qemu/target/hppa/translate.c:1313
#9  0x55cc60deaca9 in trans_sub (ctx=0x7fcb5fffe2e0, a=0x7fcb5fffe1d0) at 
/home/svens/qemu/target/hppa/translate.c:2647
#10 0x55cc60de18aa in decode (ctx=0x7fcb5fffe2e0, insn=193070082) at 
target/hppa/decode.inc.c:1699
#11 0x55cc60def6db in hppa_tr_translate_insn (dcbase=0x7fcb5fffe2e0, 
cs=0x55cc62065bf0) at /home/svens/qemu/target/hppa/translate.c:4255
#12 0x55cc60d47d6f in translator_loop (ops=0x55cc614789c0 , 
db=0x7fcb5fffe2e0, cpu=0x55cc62065bf0, tb=0x7fcb2f02e180 
, max_insns=512)
at /home/svens/qemu/accel/tcg/translator.c:102
#13 0x55cc60defb9d in gen_intermediate_code (cs=0x55cc62065bf0, 
tb=0x7fcb2f02e180 , max_insns=512) at 
/home/svens/qemu/target/hppa/translate.c:4389
#14 0x55cc60d45eeb in tb_gen_code (cpu=0x55cc62065bf0, pc=3161101733888, 
cs_base=3161095929860, flags=262915, cflags=-16777216) at 
/home/svens/qemu/accel/tcg/translate-all.c:1738
#15 0x55cc60d42452 in tb_find (cpu=0x55cc62065bf0, last_tb=0x0, tb_exit=0, 
cf_mask=0) at /home/svens/qemu/accel/tcg/cpu-exec.c:407
#16 0x55cc60d42d30 in cpu_exec (cpu=0x55cc62065bf0) at 
/home/svens/qemu/accel/tcg/cpu-exec.c:731
#17 0x55cc60dbe7d1 in tcg_cpu_exec (cpu=0x55cc62065bf0) at 
/home/svens/qemu/softmmu/cpus.c:1356
#18 0x55cc60dbeade in qemu_tcg_rr_cpu_thread_fn (arg=0x55cc62065bf0) at 
/home/svens/qemu/softmmu/cpus.c:1458
#19 0x55cc611c98f0 in qemu_thread_start (args=0x55cc6207f6b0) at 
util/qemu-thread-posix.c:521
#20 0x7fcb6824cf27 in start_thread (arg=) at 
pthread_create.c:479
#21 0x7fcb6817c31f in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb)

TCG_MAX_INSN is 512, and TCG_MAX_TEMPS also. Given the complexity of emulating 
the
parisc conditions and nullifications, i guess a 1:1 ratio is just not 
sufficient.
Increasing TCG_MAX_TEMPS to 1024 solves the issue. I haven't checked how big
the TB is, and how much temps it allocates then.

Regards
Sven




Re: [PATCH-for-5.1] hw/misc/milkymist-pfpu: Fix pFPU region size

2020-07-19 Thread Michael Walle

Hi Philippe,

Am 2020-07-18 11:37, schrieb Philippe Mathieu-Daudé:

The last microcode word (address 0x6000.6ffc) is not reachable.
Correct the programmable FPU I/O size (which is 4 KiB) to be
able to use all the microcode area.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/misc/milkymist-pfpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/misc/milkymist-pfpu.c b/hw/misc/milkymist-pfpu.c
index 516825e83d..4fbe3e8971 100644
--- a/hw/misc/milkymist-pfpu.c
+++ b/hw/misc/milkymist-pfpu.c
@@ -507,7 +507,7 @@ static void milkymist_pfpu_realize(DeviceState
*dev, Error **errp)
 sysbus_init_irq(sbd, >irq);

 memory_region_init_io(>regs_region, OBJECT(dev), 
_mmio_ops, s,

-"milkymist-pfpu", MICROCODE_END * 4);
+  "milkymist-pfpu", 0x1000);


Could you use one of the MICROCODE_ macros instead? maybe 
(MICROCODE_WORDS * 2)?


With that fixed:
Reviewed-by: Michael Walle 

-michael


 sysbus_init_mmio(sbd, >regs_region);
 }




[PATCH v7 2/6] 9pfs: make v9fs_readdir_response_size() public

2020-07-19 Thread Christian Schoenebeck
Rename function v9fs_readdir_data_size() -> v9fs_readdir_response_size()
and make it callable from other units. So far this function is only
used by 9p.c, however subsequent patches require the function to be
callable from another 9pfs unit. And as we're at it; also make it clear
for what this function is used for.

Signed-off-by: Christian Schoenebeck 
---
 hw/9pfs/9p.c | 10 --
 hw/9pfs/9p.h |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 2ffd96ade9..7a228c4828 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -2313,7 +2313,13 @@ out_nofid:
 pdu_complete(pdu, err);
 }
 
-static size_t v9fs_readdir_data_size(V9fsString *name)
+/**
+ * Returns size required in Rreaddir response for the passed dirent @p name.
+ *
+ * @param name - directory entry's name (i.e. file name, directory name)
+ * @returns required size in bytes
+ */
+size_t v9fs_readdir_response_size(V9fsString *name)
 {
 /*
  * Size of each dirent on the wire: size of qid (13) + size of offset (8)
@@ -2348,7 +2354,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, 
V9fsFidState *fidp,
 }
 v9fs_string_init();
 v9fs_string_sprintf(, "%s", dent->d_name);
-if ((count + v9fs_readdir_data_size()) > max_count) {
+if ((count + v9fs_readdir_response_size()) > max_count) {
 v9fs_readdir_unlock(>fs.dir);
 
 /* Ran out of buffer. Set dir back to old position and return */
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index ee2271663c..561774e843 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -419,6 +419,7 @@ void v9fs_path_init(V9fsPath *path);
 void v9fs_path_free(V9fsPath *path);
 void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...);
 void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src);
+size_t v9fs_readdir_response_size(V9fsString *name);
 int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
   const char *name, V9fsPath *path);
 int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
-- 
2.20.1




[PATCH v7 5/6] 9pfs: differentiate readdir lock between 9P2000.u vs. 9P2000.L

2020-07-19 Thread Christian Schoenebeck
Previous patch suggests that it might make sense to use a different mutex
type now while handling readdir requests, depending on the precise
protocol variant, as v9fs_do_readdir_with_stat() (used by 9P2000.u) uses
a CoMutex to avoid deadlocks that might happen with QemuMutex otherwise,
whereas do_readdir_many() (used by 9P2000.L) should better use a
QemuMutex, as the precise behaviour of a failed CoMutex lock on fs driver
side would not be clear.

This patch is just intended as transitional measure, as currently 9P2000.u
vs. 9P2000.L implementations currently differ where the main logic of
fetching directory entries is located at (9P2000.u still being more top
half focused, while 9P2000.L already being bottom half focused in regards
to fetching directory entries that is).

Signed-off-by: Christian Schoenebeck 
---
 hw/9pfs/9p.c |  4 ++--
 hw/9pfs/9p.h | 27 ++-
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index cc4094b971..a0881ddc88 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -314,8 +314,8 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
 f->next = s->fid_list;
 s->fid_list = f;
 
-v9fs_readdir_init(>fs.dir);
-v9fs_readdir_init(>fs_reclaim.dir);
+v9fs_readdir_init(s->proto_version, >fs.dir);
+v9fs_readdir_init(s->proto_version, >fs_reclaim.dir);
 
 return f;
 }
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 93b7030edf..3dd1b50b1a 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -197,22 +197,39 @@ typedef struct V9fsXattr
 
 typedef struct V9fsDir {
 DIR *stream;
-CoMutex readdir_mutex;
+P9ProtoVersion proto_version;
+/* readdir mutex type used for 9P2000.u protocol variant */
+CoMutex readdir_mutex_u;
+/* readdir mutex type used for 9P2000.L protocol variant */
+QemuMutex readdir_mutex_L;
 } V9fsDir;
 
 static inline void v9fs_readdir_lock(V9fsDir *dir)
 {
-qemu_co_mutex_lock(>readdir_mutex);
+if (dir->proto_version == V9FS_PROTO_2000U) {
+qemu_co_mutex_lock(>readdir_mutex_u);
+} else {
+qemu_mutex_lock(>readdir_mutex_L);
+}
 }
 
 static inline void v9fs_readdir_unlock(V9fsDir *dir)
 {
-qemu_co_mutex_unlock(>readdir_mutex);
+if (dir->proto_version == V9FS_PROTO_2000U) {
+qemu_co_mutex_unlock(>readdir_mutex_u);
+} else {
+qemu_mutex_unlock(>readdir_mutex_L);
+}
 }
 
-static inline void v9fs_readdir_init(V9fsDir *dir)
+static inline void v9fs_readdir_init(P9ProtoVersion proto_version, V9fsDir 
*dir)
 {
-qemu_co_mutex_init(>readdir_mutex);
+dir->proto_version = proto_version;
+if (proto_version == V9FS_PROTO_2000U) {
+qemu_co_mutex_init(>readdir_mutex_u);
+} else {
+qemu_mutex_init(>readdir_mutex_L);
+}
 }
 
 /**
-- 
2.20.1




[PATCH v7 4/6] 9pfs: T_readdir latency optimization

2020-07-19 Thread Christian Schoenebeck
Make top half really top half and bottom half really bottom half:

Each T_readdir request handling is hopping between threads (main
I/O thread and background I/O driver threads) several times for
every individual directory entry, which sums up to huge latencies
for handling just a single T_readdir request.

Instead of doing that, collect now all required directory entries
(including all potentially required stat buffers for each entry) in
one rush on a background I/O thread from fs driver by calling the
previously added function v9fs_co_readdir_many() instead of
v9fs_co_readdir(), then assemble the entire resulting network
response message for the readdir request on main I/O thread. The
fs driver is still aborting the directory entry retrieval loop
(on the background I/O thread inside of v9fs_co_readdir_many())
as soon as it would exceed the client's requested maximum R_readdir
response size. So this will not introduce a performance penalty on
another end.

Also: No longer seek initial directory position in v9fs_readdir(),
as this is now handled (more consistently) by
v9fs_co_readdir_many() instead.

Signed-off-by: Christian Schoenebeck 
---
 hw/9pfs/9p.c | 132 ++-
 1 file changed, 58 insertions(+), 74 deletions(-)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 7a228c4828..cc4094b971 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -972,30 +972,6 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, 
V9fsFidState *fidp,
 return 0;
 }
 
-static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
-  struct dirent *dent, V9fsQID *qidp)
-{
-struct stat stbuf;
-V9fsPath path;
-int err;
-
-v9fs_path_init();
-
-err = v9fs_co_name_to_path(pdu, >path, dent->d_name, );
-if (err < 0) {
-goto out;
-}
-err = v9fs_co_lstat(pdu, , );
-if (err < 0) {
-goto out;
-}
-err = stat_to_qid(pdu, , qidp);
-
-out:
-v9fs_path_free();
-return err;
-}
-
 V9fsPDU *pdu_alloc(V9fsState *s)
 {
 V9fsPDU *pdu = NULL;
@@ -2328,62 +2304,74 @@ size_t v9fs_readdir_response_size(V9fsString *name)
 return 24 + v9fs_string_size(name);
 }
 
+static void v9fs_free_dirents(struct V9fsDirEnt *e)
+{
+struct V9fsDirEnt *next = NULL;
+
+for (; e; e = next) {
+next = e->next;
+g_free(e->dent);
+g_free(e->st);
+g_free(e);
+}
+}
+
 static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
-int32_t max_count)
+off_t offset, int32_t max_count)
 {
 size_t size;
 V9fsQID qid;
 V9fsString name;
 int len, err = 0;
 int32_t count = 0;
-off_t saved_dir_pos;
 struct dirent *dent;
+struct stat *st;
+struct V9fsDirEnt *entries = NULL;
 
-/* save the directory position */
-saved_dir_pos = v9fs_co_telldir(pdu, fidp);
-if (saved_dir_pos < 0) {
-return saved_dir_pos;
-}
-
-while (1) {
-v9fs_readdir_lock(>fs.dir);
+/*
+ * inode remapping requires the device id, which in turn might be
+ * different for different directory entries, so if inode remapping is
+ * enabled we have to make a full stat for each directory entry
+ */
+const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES;
 
-err = v9fs_co_readdir(pdu, fidp, );
-if (err || !dent) {
-break;
-}
-v9fs_string_init();
-v9fs_string_sprintf(, "%s", dent->d_name);
-if ((count + v9fs_readdir_response_size()) > max_count) {
-v9fs_readdir_unlock(>fs.dir);
+/*
+ * Fetch all required directory entries altogether on a background IO
+ * thread from fs driver. We don't want to do that for each entry
+ * individually, because hopping between threads (this main IO thread
+ * and background IO driver thread) would sum up to huge latencies.
+ */
+count = v9fs_co_readdir_many(pdu, fidp, , offset, max_count,
+ dostat);
+if (count < 0) {
+err = count;
+count = 0;
+goto out;
+}
+count = 0;
 
-/* Ran out of buffer. Set dir back to old position and return */
-v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
-v9fs_string_free();
-return count;
-}
+for (struct V9fsDirEnt *e = entries; e; e = e->next) {
+dent = e->dent;
 
 if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
-/*
- * dirent_to_qid() implies expensive stat call for each entry,
- * we must do that here though since inode remapping requires
- * the device id, which in turn might be different for
- * different entries; we cannot make any assumption to avoid
- * that here.
- */
-err = dirent_to_qid(pdu, fidp, dent, );
+st = e->st;
+ 

[PATCH v7 0/6] 9pfs: readdir optimization

2020-07-19 Thread Christian Schoenebeck
As previously mentioned, I was investigating performance issues with 9pfs.
Raw file read/write of 9pfs is actually quite good, provided that client
picked a reasonable high msize (maximum message size). I would recommend
to log a warning on 9p server side if a client attached with a small msize
that would cause performance issues for that reason.

However there are other aspects where 9pfs currently performs suboptimally,
especially readdir handling of 9pfs is extremely slow, a simple readdir
request of a guest typically blocks for several hundred milliseconds or
even several seconds, no matter how powerful the underlying hardware is.
The reason for this performance issue: latency.
Currently 9pfs is heavily dispatching a T_readdir request numerous times
between main I/O thread and a background I/O thread back and forth; in fact
it is actually hopping between threads even multiple times for every single
directory entry during T_readdir request handling which leads in total to
huge latencies for a single T_readdir request.

This patch series aims to address this severe performance issue of 9pfs
T_readdir request handling. The actual performance optimization is patch 4.

v6->v7:

  * Rebased to master: SHA-1 b442119329

  * Handle directory seeking more consistently by doing it in
do_readdir_many() instead of in v9fs_readdir() [patch 3], [patch 4].

  * Updated API doc on v9fs_co_readdir_many(): make it clear that
v9fs_free_dirents() must always be called, including error cases
[patch 3].

  * New patch: use different lock type for 9p2000.u vs. 9p2000.L
[patch 5].

Unchanged patches: [patch 1], [patch 2], [patch 6].

Message-ID of previous version (v6):
  cover.1587309014.git.qemu_...@crudebyte.com

Message-ID of version with performance benchmark (v4):
  cover.1579567019.git.qemu_...@crudebyte.com

Christian Schoenebeck (6):
  tests/virtio-9p: added split readdir tests
  9pfs: make v9fs_readdir_response_size() public
  9pfs: add new function v9fs_co_readdir_many()
  9pfs: T_readdir latency optimization
  9pfs: differentiate readdir lock between 9P2000.u vs. 9P2000.L
  9pfs: clarify latency of v9fs_co_run_in_worker()

 hw/9pfs/9p.c | 144 -
 hw/9pfs/9p.h |  50 -
 hw/9pfs/codir.c  | 196 +--
 hw/9pfs/coth.h   |  15 ++-
 tests/qtest/virtio-9p-test.c | 108 +++
 5 files changed, 419 insertions(+), 94 deletions(-)

-- 
2.20.1




[PATCH v7 6/6] 9pfs: clarify latency of v9fs_co_run_in_worker()

2020-07-19 Thread Christian Schoenebeck
As we just fixed a severe performance issue with Treaddir request
handling, clarify this overall issue as a comment on
v9fs_co_run_in_worker() with the intention to hopefully prevent
such performance mistakes in future (and fixing other yet
outstanding ones).

Signed-off-by: Christian Schoenebeck 
---
 hw/9pfs/coth.h | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
index fd4a45bc7c..c51289903d 100644
--- a/hw/9pfs/coth.h
+++ b/hw/9pfs/coth.h
@@ -19,7 +19,7 @@
 #include "qemu/coroutine.h"
 #include "9p.h"
 
-/*
+/**
  * we want to use bottom half because we want to make sure the below
  * sequence of events.
  *
@@ -28,6 +28,16 @@
  *   3. Enter the coroutine in the worker thread.
  * we cannot swap step 1 and 2, because that would imply worker thread
  * can enter coroutine while step1 is still running
+ *
+ * @b PERFORMANCE @b CONSIDERATIONS: As a rule of thumb, keep in mind
+ * that hopping between threads adds @b latency! So when handling a
+ * 9pfs request, avoid calling v9fs_co_run_in_worker() too often, because
+ * this might otherwise sum up to a significant, huge overall latency for
+ * providing the response for just a single request. For that reason it
+ * is highly recommended to fetch all data from fs driver with a single
+ * fs driver request on a background I/O thread (bottom half) in one rush
+ * first and then eventually assembling the final response from that data
+ * on main I/O thread (top half).
  */
 #define v9fs_co_run_in_worker(code_block)   \
 do {\
-- 
2.20.1




[PATCH v7 1/6] tests/virtio-9p: added split readdir tests

2020-07-19 Thread Christian Schoenebeck
The previous, already existing 'basic' readdir test simply used a
'count' parameter big enough to retrieve all directory entries with a
single Treaddir request.

In the 3 new 'split' readdir tests added by this patch, directory
entries are retrieved, split over several Treaddir requests by picking
small 'count' parameters which force the server to truncate the
response. So the test client sends as many Treaddir requests as
necessary to get all directory entries.

The following 3 new tests are added (executed in this sequence):

1. Split readdir test with count=512
2. Split readdir test with count=256
3. Split readdir test with count=128

This test case sequence is chosen because the smaller the 'count' value,
the higher the chance of errors in case of implementation bugs on server
side.

Signed-off-by: Christian Schoenebeck 
---
 tests/qtest/virtio-9p-test.c | 108 +++
 1 file changed, 108 insertions(+)

diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index 2167322985..de30b717b6 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -578,6 +578,7 @@ static bool fs_dirents_contain_name(struct V9fsDirent *e, 
const char* name)
 return false;
 }
 
+/* basic readdir test where reply fits into a single response message */
 static void fs_readdir(void *obj, void *data, QGuestAllocator *t_alloc)
 {
 QVirtio9P *v9p = obj;
@@ -631,6 +632,89 @@ static void fs_readdir(void *obj, void *data, 
QGuestAllocator *t_alloc)
 g_free(wnames[0]);
 }
 
+/* readdir test where overall request is split over several messages */
+static void fs_readdir_split(void *obj, void *data, QGuestAllocator *t_alloc,
+ uint32_t count)
+{
+QVirtio9P *v9p = obj;
+alloc = t_alloc;
+char *const wnames[] = { g_strdup(QTEST_V9FS_SYNTH_READDIR_DIR) };
+uint16_t nqid;
+v9fs_qid qid;
+uint32_t nentries, npartialentries;
+struct V9fsDirent *entries, *tail, *partialentries;
+P9Req *req;
+int fid;
+uint64_t offset;
+
+fs_attach(v9p, NULL, t_alloc);
+
+fid = 1;
+offset = 0;
+entries = NULL;
+nentries = 0;
+tail = NULL;
+
+req = v9fs_twalk(v9p, 0, fid, 1, wnames, 0);
+v9fs_req_wait_for_reply(req, NULL);
+v9fs_rwalk(req, , NULL);
+g_assert_cmpint(nqid, ==, 1);
+
+req = v9fs_tlopen(v9p, fid, O_DIRECTORY, 0);
+v9fs_req_wait_for_reply(req, NULL);
+v9fs_rlopen(req, , NULL);
+
+/*
+ * send as many Treaddir requests as required to get all directory
+ * entries
+ */
+while (true) {
+npartialentries = 0;
+partialentries = NULL;
+
+req = v9fs_treaddir(v9p, fid, offset, count, 0);
+v9fs_req_wait_for_reply(req, NULL);
+v9fs_rreaddir(req, , , );
+if (npartialentries > 0 && partialentries) {
+if (!entries) {
+entries = partialentries;
+nentries = npartialentries;
+tail = partialentries;
+} else {
+tail->next = partialentries;
+nentries += npartialentries;
+}
+while (tail->next) {
+tail = tail->next;
+}
+offset = tail->offset;
+} else {
+break;
+}
+}
+
+g_assert_cmpint(
+nentries, ==,
+QTEST_V9FS_SYNTH_READDIR_NFILES + 2 /* "." and ".." */
+);
+
+/*
+ * Check all file names exist in returned entries, ignore their order
+ * though.
+ */
+g_assert_cmpint(fs_dirents_contain_name(entries, "."), ==, true);
+g_assert_cmpint(fs_dirents_contain_name(entries, ".."), ==, true);
+for (int i = 0; i < QTEST_V9FS_SYNTH_READDIR_NFILES; ++i) {
+char *name = g_strdup_printf(QTEST_V9FS_SYNTH_READDIR_FILE, i);
+g_assert_cmpint(fs_dirents_contain_name(entries, name), ==, true);
+g_free(name);
+}
+
+v9fs_free_dirents(entries);
+
+g_free(wnames[0]);
+}
+
 static void fs_walk_no_slash(void *obj, void *data, QGuestAllocator *t_alloc)
 {
 QVirtio9P *v9p = obj;
@@ -793,6 +877,24 @@ static void fs_flush_ignored(void *obj, void *data, 
QGuestAllocator *t_alloc)
 g_free(wnames[0]);
 }
 
+static void fs_readdir_split_128(void *obj, void *data,
+ QGuestAllocator *t_alloc)
+{
+fs_readdir_split(obj, data, t_alloc, 128);
+}
+
+static void fs_readdir_split_256(void *obj, void *data,
+ QGuestAllocator *t_alloc)
+{
+fs_readdir_split(obj, data, t_alloc, 256);
+}
+
+static void fs_readdir_split_512(void *obj, void *data,
+ QGuestAllocator *t_alloc)
+{
+fs_readdir_split(obj, data, t_alloc, 512);
+}
+
 static void register_virtio_9p_test(void)
 {
 qos_add_test("config", "virtio-9p", pci_config, NULL);
@@ -810,6 +912,12 @@ static void register_virtio_9p_test(void)
 qos_add_test("fs/flush/ignored", "virtio-9p", fs_flush_ignored,

[PATCH v7 3/6] 9pfs: add new function v9fs_co_readdir_many()

2020-07-19 Thread Christian Schoenebeck
The newly added function v9fs_co_readdir_many() retrieves multiple
directory entries with a single fs driver request. It is intended to
replace uses of v9fs_co_readdir(), the latter only retrives a single
directory entry per fs driver request instead.

The reason for this planned replacement is that for every fs driver
request the coroutine is dispatched from main I/O thread to a
background I/O thread and eventually dispatched back to main I/O
thread. Hopping between threads adds latency. So if a 9pfs Treaddir
request reads a large amount of directory entries, this currently
sums up to huge latencies of several hundred ms or even more. So
using v9fs_co_readdir_many() instead of v9fs_co_readdir() will
provide significant performance improvements.

Signed-off-by: Christian Schoenebeck 
---
 hw/9pfs/9p.h|  22 ++
 hw/9pfs/codir.c | 196 +---
 hw/9pfs/coth.h  |   3 +
 3 files changed, 210 insertions(+), 11 deletions(-)

diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 561774e843..93b7030edf 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -215,6 +215,28 @@ static inline void v9fs_readdir_init(V9fsDir *dir)
 qemu_co_mutex_init(>readdir_mutex);
 }
 
+/**
+ * Type for 9p fs drivers' (a.k.a. 9p backends) result of readdir requests,
+ * which is a chained list of directory entries.
+ */
+typedef struct V9fsDirEnt {
+/* mandatory (must not be NULL) information for all readdir requests */
+struct dirent *dent;
+/*
+ * optional (may be NULL): A full stat of each directory entry is just
+ * done if explicitly told to fs driver.
+ */
+struct stat *st;
+/*
+ * instead of an array, directory entries are always returned as
+ * chained list, that's because the amount of entries retrieved by fs
+ * drivers is dependent on the individual entries' name (since response
+ * messages are size limited), so the final amount cannot be estimated
+ * before hand
+ */
+struct V9fsDirEnt *next;
+} V9fsDirEnt;
+
 /*
  * Filled by fs driver on open and other
  * calls.
diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index 73f9a751e1..07da5d8d70 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -18,28 +18,202 @@
 #include "qemu/main-loop.h"
 #include "coth.h"
 
+/*
+ * This is solely executed on a background IO thread.
+ */
+static int do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, struct dirent **dent)
+{
+int err = 0;
+V9fsState *s = pdu->s;
+struct dirent *entry;
+
+errno = 0;
+entry = s->ops->readdir(>ctx, >fs);
+if (!entry && errno) {
+*dent = NULL;
+err = -errno;
+} else {
+*dent = entry;
+}
+return err;
+}
+
+/*
+ * TODO: This will be removed for performance reasons.
+ * Use v9fs_co_readdir_many() instead.
+ */
 int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
  struct dirent **dent)
 {
 int err;
-V9fsState *s = pdu->s;
 
 if (v9fs_request_cancelled(pdu)) {
 return -EINTR;
 }
-v9fs_co_run_in_worker(
-{
-struct dirent *entry;
+v9fs_co_run_in_worker({
+err = do_readdir(pdu, fidp, dent);
+});
+return err;
+}
+
+/*
+ * This is solely executed on a background IO thread.
+ *
+ * See v9fs_co_readdir_many() (as its only user) below for details.
+ */
+static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
+   struct V9fsDirEnt **entries, off_t offset,
+   int32_t maxsize, bool dostat)
+{
+V9fsState *s = pdu->s;
+V9fsString name;
+int len, err = 0;
+int32_t size = 0;
+off_t saved_dir_pos;
+struct dirent *dent;
+struct V9fsDirEnt *e = NULL;
+V9fsPath path;
+struct stat stbuf;
+
+*entries = NULL;
+v9fs_path_init();
+
+/*
+ * TODO: Here should be a warn_report_once() if lock failed.
+ *
+ * With a good 9p client we should not get into concurrency here,
+ * because a good client would not use the same fid for concurrent
+ * requests. We do the lock here for safety reasons though. However
+ * the client would then suffer performance issues, so better log that
+ * issue here.
+ */
+v9fs_readdir_lock(>fs.dir);
+
+/* seek directory to requested initial position */
+if (offset == 0) {
+s->ops->rewinddir(>ctx, >fs);
+} else {
+s->ops->seekdir(>ctx, >fs, offset);
+}
+
+/* save the directory position */
+saved_dir_pos = s->ops->telldir(>ctx, >fs);
+if (saved_dir_pos < 0) {
+err = saved_dir_pos;
+goto out;
+}
+
+while (true) {
+/* get directory entry from fs driver */
+err = do_readdir(pdu, fidp, );
+if (err || !dent) {
+break;
+}
 
-errno = 0;
-entry = s->ops->readdir(>ctx, >fs);
-if (!entry && errno) {
+/*
+ * stop this loop as soon as it would exceed the allowed maximum

Re: [PATCH v2 1/4] scripts/tracetool: Fix dtrace generation for macOS

2020-07-19 Thread Philippe Mathieu-Daudé
On 7/17/20 11:35 AM, Roman Bolshakov wrote:
> dtrace USDT is fully supported since OS X 10.6. There are a few
> peculiarities compared to other dtrace flavors.
> 
> 1. It doesn't accept empty files.
> 2. It doesn't recognize bool type but accepts C99 _Bool.
> 3. It converts int8_t * in probe points to char * in
>header files and introduces [-Wpointer-sign] warning.
> 
> Cc: Cameron Esfahani 
> Signed-off-by: Roman Bolshakov 
> ---
>  scripts/tracetool/format/d.py | 15 ++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/scripts/tracetool/format/d.py b/scripts/tracetool/format/d.py
> index 0afb5f3f47..353722f89c 100644
> --- a/scripts/tracetool/format/d.py
> +++ b/scripts/tracetool/format/d.py
> @@ -13,6 +13,7 @@ __email__  = "stefa...@redhat.com"
>  
>  
>  from tracetool import out
> +from sys import platform
>  
>  
>  # Reserved keywords from
> @@ -34,7 +35,8 @@ def generate(events, backend, group):
>  
>  # SystemTap's dtrace(1) warns about empty "provider qemu {}" but is happy
>  # with an empty file.  Avoid the warning.
> -if not events:
> +# But dtrace on macOS can't deal with empty files.
> +if not events and platform != "darwin":

or?

>  return
>  
>  out('/* This file is autogenerated by tracetool, do not edit. */'
> @@ -44,6 +46,17 @@ def generate(events, backend, group):
>  for e in events:
>  args = []
>  for type_, name in e.args:
> +if platform == "darwin":
> +# macOS dtrace accepts only C99 _Bool

Why not do that for all platforms?

> +if type_ == 'bool':
> +type_ = '_Bool'
> +if type_ == 'bool *':
> +type_ = '_Bool *'
> +# It converts int8_t * in probe points to char * in header
> +# files and introduces [-Wpointer-sign] warning.
> +# Avoid it by changing probe type to signed char * 
> beforehand.
> +if type_ == 'int8_t *':
> +type_ = 'signed char *'
>  if name in RESERVED_WORDS:
>  name += '_'
>  args.append(type_ + ' ' + name)
> 




Re: [PATCH-for-5.1] qdev: Allow to create hotplug device before plugging it to a bus

2020-07-19 Thread no-reply
Patchew URL: https://patchew.org/QEMU/20200719134329.21613-1-f4...@amsat.org/



Hi,

This series failed the docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#! /bin/bash
export ARCH=x86_64
make docker-image-fedora V=1 NETWORK=1
time make docker-test-mingw@fedora J=14 NETWORK=1
=== TEST SCRIPT END ===




The full log is available at
http://patchew.org/logs/20200719134329.21613-1-f4...@amsat.org/testing.docker-mingw@fedora/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH-for-5.1] qdev: Allow to create hotplug device before plugging it to a bus

2020-07-19 Thread no-reply
Patchew URL: https://patchew.org/QEMU/20200719134329.21613-1-f4...@amsat.org/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20200719134329.21613-1-f4...@amsat.org
Subject: [PATCH-for-5.1] qdev: Allow to create hotplug device before plugging 
it to a bus

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

fatal: unable to write new index file
warning: Clone succeeded, but checkout failed.
You can inspect what was checked out with 'git status'
and retry the checkout with 'git checkout -f HEAD'

Traceback (most recent call last):
  File "patchew-tester2/src/patchew-cli", line 531, in test_one
git_clone_repo(clone, r["repo"], r["head"], logf, True)
  File "patchew-tester2/src/patchew-cli", line 62, in git_clone_repo
subprocess.check_call(clone_cmd, stderr=logf, stdout=logf)
  File "/opt/rh/rh-python36/root/usr/lib64/python3.6/subprocess.py", line 291, 
in check_call
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['git', 'clone', '-q', 
'/home/patchew2/.cache/patchew-git-cache/httpsgithubcompatchewprojectqemu-3c8cf5a9c21ff8782164d1def7f44bd888713384',
 '/var/tmp/patchew-tester-tmp-3_pdffy_/src']' returned non-zero exit status 128.



The full log is available at
http://patchew.org/logs/20200719134329.21613-1-f4...@amsat.org/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PATCH-for-5.1] qdev: Allow to create hotplug device before plugging it to a bus

2020-07-19 Thread Philippe Mathieu-Daudé
Commit 510ef98dca made qdev_realize() support bus-less devices,
asserting either the device is bus-less or the device is created
on a bus. Commit 464a22c757 used qdev_realize() instead of
object_property_set_bool(). Since qdev_realize() now checks for
a bus, it is not possible to create hotplug devices unattached
to any bus anymore.

Fix by only asserting if the device is not hotpluggable.

Fixes: 464a22c757 "qdev: Use qdev_realize() in qdev_device_add()"
Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/core/qdev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 01796823b4..6c5540ecdc 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -393,7 +393,9 @@ bool qdev_realize(DeviceState *dev, BusState *bus, Error 
**errp)
 if (bus) {
 qdev_set_parent_bus(dev, bus);
 } else {
-assert(!DEVICE_GET_CLASS(dev)->bus_type);
+DeviceClass *dc = DEVICE_GET_CLASS(dev);
+
+assert(dc->hotpluggable || !dc->bus_type);
 }
 
 return object_property_set_bool(OBJECT(dev), "realized", true, errp);
-- 
2.21.3




Re: [PATCH v2] fcntl: Add 32bit filesystem mode

2020-07-19 Thread Linus Walleij
On Mon, Jul 6, 2020 at 10:54 AM Linus Walleij  wrote:

> Ted, can you merge this patch?
>
> It seems QEMU is happy and AFICT it uses the approach you want :)

Gentle ping!

Yours,
Linus Walleij



Re: [PATCH 0/2] Fix for write sharing on luks raw images

2020-07-19 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20200719122059.59843-1-mlevi...@redhat.com/



Hi,

This series failed the docker-quick@centos7 build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
make docker-image-centos7 V=1 NETWORK=1
time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
=== TEST SCRIPT END ===




The full log is available at
http://patchew.org/logs/20200719122059.59843-1-mlevi...@redhat.com/testing.docker-quick@centos7/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PATCH 2/2] qemu-iotests: add testcase for bz #1857490

2020-07-19 Thread Maxim Levitsky
Test that we can't write-share raw luks images by default,
but we still can with share-rw=on

Signed-off-by: Maxim Levitsky 
---
 tests/qemu-iotests/296 | 44 +-
 tests/qemu-iotests/296.out | 12 +--
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/296 b/tests/qemu-iotests/296
index ec69ec8974..fb7dec88aa 100755
--- a/tests/qemu-iotests/296
+++ b/tests/qemu-iotests/296
@@ -133,6 +133,21 @@ class EncryptionSetupTestCase(iotests.QMPTestCase):
 )
 self.assert_qmp(result, 'return', {})
 
+
+###
+# add virtio-blk consumer for a block device
+def addImageUser(self, vm, id, disk_id, share_rw=False):
+result = vm.qmp('device_add', **
+{
+'driver': 'virtio-blk',
+'id': id,
+'drive': disk_id,
+'share-rw' : share_rw
+}
+)
+
+iotests.log(result)
+
 # close the encrypted block device
 def closeImageQmp(self, vm, id):
 result = vm.qmp('blockdev-del', **{ 'node-name': id })
@@ -159,7 +174,7 @@ class EncryptionSetupTestCase(iotests.QMPTestCase):
 vm.run_job('job0')
 
 # test that when the image opened by two qemu processes,
-# neither of them can update the image
+# neither of them can update the encryption keys
 def test1(self):
 self.createImg(test_img, self.secrets[0]);
 
@@ -193,6 +208,9 @@ class EncryptionSetupTestCase(iotests.QMPTestCase):
 os.remove(test_img)
 
 
+# test that when the image opened by two qemu processes,
+# even if first VM opens it read-only, the second can't update encryption
+# keys
 def test2(self):
 self.createImg(test_img, self.secrets[0]);
 
@@ -226,6 +244,30 @@ class EncryptionSetupTestCase(iotests.QMPTestCase):
 self.closeImageQmp(self.vm1, "testdev")
 os.remove(test_img)
 
+# test that two VMs can't open the same luks image by default
+# and attach it to a guest device
+def test3(self):
+self.createImg(test_img, self.secrets[0]);
+
+self.openImageQmp(self.vm1, "testdev", test_img, self.secrets[0])
+self.addImageUser(self.vm1, "testctrl", "testdev")
+
+self.openImageQmp(self.vm2, "testdev", test_img, self.secrets[0])
+self.addImageUser(self.vm2, "testctrl", "testdev")
+
+
+# test that two VMs can attach the same luks image to a guest device,
+# if both use share-rw=on
+def test4(self):
+self.createImg(test_img, self.secrets[0]);
+
+self.openImageQmp(self.vm1, "testdev", test_img, self.secrets[0])
+self.addImageUser(self.vm1, "testctrl", "testdev", share_rw=True)
+
+self.openImageQmp(self.vm2, "testdev", test_img, self.secrets[0])
+self.addImageUser(self.vm2, "testctrl", "testdev", share_rw=True)
+
+
 
 if __name__ == '__main__':
 # support only raw luks since luks encrypted qcow2 is a proper
diff --git a/tests/qemu-iotests/296.out b/tests/qemu-iotests/296.out
index afb6d2d09d..cb2859a15c 100644
--- a/tests/qemu-iotests/296.out
+++ b/tests/qemu-iotests/296.out
@@ -26,8 +26,16 @@ Job failed: Failed to get shared "consistent read" lock
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
-..
+Formatting 'TEST_DIR/test.img', fmt=luks size=1048576 key-secret=keysec0 
iter-time=10
+
+{"return": {}}
+{"error": {"class": "GenericError", "desc": "Failed to get \"write\" lock"}}
+Formatting 'TEST_DIR/test.img', fmt=luks size=1048576 key-secret=keysec0 
iter-time=10
+
+{"return": {}}
+{"return": {}}
+
 --
-Ran 2 tests
+Ran 4 tests
 
 OK
-- 
2.26.2




[PATCH 1/2] block/crypto: disallow write sharing by default

2020-07-19 Thread Maxim Levitsky
My commit 'block/crypto: implement the encryption key management'
accidently allowed raw luks images to be shared between different
qemu processes without share-rw=on explicit override.
Fix that.

Fixes: bbfdae91fb ("block/crypto: implement the encryption key management")
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1857490

Signed-off-by: Maxim Levitsky 
---
 block/crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/crypto.c b/block/crypto.c
index 8725c1bc02..0807557763 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -881,7 +881,7 @@ block_crypto_child_perms(BlockDriverState *bs, BdrvChild *c,
  * For backward compatibility, manually share the write
  * and resize permission
  */
-*nshared |= (BLK_PERM_WRITE | BLK_PERM_RESIZE);
+*nshared |= shared & (BLK_PERM_WRITE | BLK_PERM_RESIZE);
 /*
  * Since we are not fully a format driver, don't always request
  * the read/resize permission but only when explicitly
-- 
2.26.2




[PATCH 0/2] Fix for write sharing on luks raw images

2020-07-19 Thread Maxim Levitsky
A rebase gone wrong, and I ended up allowing a luks image
to be opened at the same time by two VMs without any warnings/overrides.

Fix that and also add an iotest to prevent this from happening.

Best regards,
Maxim Levisky

Maxim Levitsky (2):
  block/crypto: disallow write sharing by default
  qemu-iotests: add testcase for bz #1857490

 block/crypto.c |  2 +-
 tests/qemu-iotests/296 | 44 +-
 tests/qemu-iotests/296.out | 12 +--
 3 files changed, 54 insertions(+), 4 deletions(-)

-- 
2.26.2





various iotests failures apparently due to overly optimistic timeout settings

2020-07-19 Thread Peter Maydell
I just had a bunch of iotests fail on a freebsd VM test run.
I think the machine the VM runs on is sometimes a bit heavily
loaded for I/O, which means the VM can run slowly. This causes
various over-optimistic timeouts in the iotest testsuite to
spuriously fail. I also saw the 030 failure on the netbsd VM.

Can we try to avoid timing-sensitive tests where we can,
and make timeouts longer than 3 seconds where we can't, please?
This kind of intermittent failure makes my mergetests flaky.

Relevant bits of the log below, there are several different
flavours of failure.


  TESTiotest-qcow2: 030 [fail]
QEMU  --
"/home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/../../aarch64-softmmu/qemu-system-aarch64"
-nodefaults -display none -machine virt -accel qtest
QEMU_IMG  --
"/home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/../../qemu-img"
QEMU_IO   --
"/home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/../../qemu-io"
--cache writeback --aio threads -f qcow2
QEMU_NBD  --
"/home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/../../qemu-nbd"
IMGFMT-- qcow2 (compat=1.1)
IMGPROTO  -- file
PLATFORM  -- FreeBSD/amd64 freebsd 12.1-RELEASE
TEST_DIR  -- /home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/scratch
SOCK_DIR  -- /tmp/tmp.JFidXl5N
SOCKET_SCM_HELPER --

--- /home/qemu/qemu-test.oJwuyW/src/tests/qemu-iotests/030.out
2020-07-19 09:29:05.0 +
+++ /home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/030.out.bad
 2020-07-19 09:52:04.256005000 +
@@ -1,5 +1,57 @@
-...
+WARNING:qemu.machine:qemu received signal 9; command:
"/home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/../../aarch64-softmmu/qemu-system-aarch64
-display none -vga none -chardev
socket,id=mon,path=/tmp/tmp.JFidXl5N/qemu-24906-monitor.sock -mon
chardev=mon,mode=control -qtest
unix:path=/tmp/tmp.JFidXl5N/qemu-24906-qtest.sock -accel qtest
-nodefaults -display none -machine virt -accel qtest -drive
if=virtio,id=drive0,file=blkdebug::/home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/scratch/test.img,format=qcow2,cache=writeback,aio=threads"
+WARNING:qemu.machine:qemu received signal 9; command:
"/home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/../../aarch64-softmmu/qemu-system-aarch64
-display none -vga none -chardev
socket,id=mon,path=/tmp/tmp.JFidXl5N/qemu-24906-monitor.sock -mon
chardev=mon,mode=control -qtest
unix:path=/tmp/tmp.JFidXl5N/qemu-24906-qtest.sock -accel qtest
-nodefaults -display none -machine virt -accel qtest -drive
if=virtio,id=drive0,file=blkdebug::/home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/scratch/test.img,format=qcow2,cache=writeback,aio=threads,backing.node-name=mid,backing.backing.node-name=base"
+...E..E
+==
+ERROR: test_set_speed (__main__.TestSetSpeed)
 --
+Traceback (most recent call last):
+  File 
"/usr/home/qemu/qemu-test.oJwuyW/src/tests/qemu-iotests/../../python/qemu/machine.py",
line 436, in _do_shutdown
+self._soft_shutdown(has_quit, timeout)
+  File 
"/usr/home/qemu/qemu-test.oJwuyW/src/tests/qemu-iotests/../../python/qemu/machine.py",
line 419, in _soft_shutdown
+self._popen.wait(timeout=timeout)
+  File "/usr/local/lib/python3.7/subprocess.py", line 1019, in wait
+return self._wait(timeout=timeout)
+  File "/usr/local/lib/python3.7/subprocess.py", line 1645, in _wait
+raise TimeoutExpired(self.args, timeout)
+subprocess.TimeoutExpired: Command
'['/home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/../../aarch64-softmmu/qemu-system-aarch64',
'-display', 'none', '-vga', 'none', '-chardev',
'socket,id=mon,path=/tmp/tmp.JFidXl5N/qemu-24906-monitor.sock',
'-mon', 'chardev=mon,mode=control', '-qtest',
'unix:path=/tmp/tmp.JFidXl5N/qemu-24906-qtest.sock', '-accel',
'qtest', '-nodefaults', '-display', 'none', '-machine', 'virt',
'-accel', 'qtest', '-drive',
'if=virtio,id=drive0,file=blkdebug::/home/qemu/qemu-test.oJwuyW/build/tests/qemu-iotests/scratch/test.img,format=qcow2,cache=writeback,aio=threads']'
timed out after 3 seconds
+
+The above exception was the direct cause of the following exception:
+
+Traceback (most recent call last):
+  File "030", line 900, in tearDown
+self.vm.shutdown()
+  File 
"/usr/home/qemu/qemu-test.oJwuyW/src/tests/qemu-iotests/../../python/qemu/machine.py",
line 466, in shutdown
+self._do_shutdown(has_quit, timeout=timeout)
+  File 
"/usr/home/qemu/qemu-test.oJwuyW/src/tests/qemu-iotests/../../python/qemu/machine.py",
line 440, in _do_shutdown
+from exc
+qemu.machine.AbnormalShutdown: Could not perform graceful shutdown
+
+==
+ERROR: test_stream_no_op (__main__.TestSingleDrive)
+--
+Traceback (most recent call last):
+  File 

Re: Is traceroute supposed to work in user mode networking (slirp) ?

2020-07-19 Thread Samuel Thibault
Ottavio Caruso, le dim. 19 juil. 2020 12:07:21 +0100, a ecrit:
> On Sun, 19 Jul 2020 at 03:50, Samuel Thibault  wrote:
> > Ottavio Caruso, le mar. 14 juil. 2020 12:15:48 +0100, a ecrit:
> > > I cannot get traceroute to work with standard udp from any of my
> > > guests.
> > >
> > > $ traceroute 8.8.8.8
> > > traceroute to 8.8.8.8 (8.8.8.8), 64 hops max, 40 byte packets
> > >  1  * * *
> >
> > That was because
> >
> > - libslirp was not forwarding the ttl value, thus always set to 64 hops
> > - libslirp was not reporting icmp errors.
> >
> > I had a try at both, and that indeed seems to be fixing the issue:
> > https://gitlab.freedesktop.org/slirp/libslirp/-/merge_requests/48
> > https://gitlab.freedesktop.org/slirp/libslirp/-/merge_requests/49
> >
> > > Any clues? Is this intended behaviour? Any workarounds that don't
> > > involve tap/tun/bridges?
> >
> > Not without updating libslirp with the abovementioned patches.
> 
> Thanks Samuel. I've added a comment on the portal, but for the benefit
> of qemu-devel:
> 
> Applying this patch on the latest qemu (5.0.90),

Did you also apply
https://gitlab.freedesktop.org/slirp/libslirp/-/merge_requests/48 ?

Samuel



Re: [GIT PULL] IPMI updates

2020-07-19 Thread Peter Maydell
On Fri, 17 Jul 2020 at 17:48, Corey Minyard  wrote:
>
> The following changes since commit 95d1fbabae0cd44156ac4b96d512d143ca7dfd5e:
>
>   Merge remote-tracking branch 
> 'remotes/kraxel/tags/fixes-20200716-pull-request' into staging (2020-07-16 
> 18:50:51 +0100)
>
> are available in the Git repository at:
>
>   https://github.com/cminyard/qemu.git tags/for-qemu-ipmi-5
>
> for you to fetch changes up to e3f7320caa1cc08a9b752e555b79abd6218c7c7a:
>
>   ipmi: add SET_SENSOR_READING command (2020-07-17 11:39:46 -0500)
>
> 
> Man page update and new set sensor command
>
> Some minor man page updates for fairly obvious things.
>
> The set sensor command addition has been in the Power group's tree for a
> long time and I have neglected to submit it.
>
> -corey


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/5.1
for any user-visible changes.

-- PMM



[PATCH 0/3] build: fix for SunOS systems.

2020-07-19 Thread David Carlier
David Carlier (3):
  configure: fix for SunOS based systems.
  exec: posix_madvise usage on SunOS.
  contrib: ivshmem client and server build fix for SunOS.

 configure   |  2 +-
 contrib/ivshmem-client/ivshmem-client.c | 12 ++--
 contrib/ivshmem-server/ivshmem-server.c | 12 ++--
 exec.c  |  8 
 4 files changed, 21 insertions(+), 13 deletions(-)

-- 
2.25.4