Re: [PATCH v6 1/5] target/riscv: Add smstateen support

2022-07-27 Thread Mayuresh Chitale
On Mon, 2022-07-25 at 15:11 +0800, Weiwei Li wrote:
> 在 2022/7/24 下午11:39, Mayuresh Chitale 写道:
> > On Fri, 2022-07-22 at 08:31 +0800, Weiwei Li wrote:
> > > 在 2022/7/21 下午11:31, Mayuresh Chitale 写道:
> > > > Smstateen extension specifies a mechanism to close
> > > > the potential covert channels that could cause security issues.
> > > > 
> > > > This patch adds the CSRs defined in the specification and
> > > > the corresponding predicates and read/write functions.
> > > > 
> > > > Signed-off-by: Mayuresh Chitale 
> > > > ---
> > > >target/riscv/cpu.h  |   4 +
> > > >target/riscv/cpu_bits.h |  37 
> > > >target/riscv/csr.c  | 370
> > > > 
> > > >target/riscv/machine.c  |  21 +++
> > > >4 files changed, 432 insertions(+)
> > > > 
> > > > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> > > > index ffb1a18873..7f8e5b0014 100644
> > > > --- a/target/riscv/cpu.h
> > > > +++ b/target/riscv/cpu.h
> > > > @@ -354,6 +354,9 @@ struct CPUArchState {
> > > >
> > > >/* CSRs for execution enviornment configuration */
> > > >uint64_t menvcfg;
> > > > +uint64_t mstateen[SMSTATEEN_MAX_COUNT];
> > > > +uint64_t hstateen[SMSTATEEN_MAX_COUNT];
> > > > +uint64_t sstateen[SMSTATEEN_MAX_COUNT];
> > > >target_ulong senvcfg;
> > > >uint64_t henvcfg;
> > > >#endif
> > > > @@ -426,6 +429,7 @@ struct RISCVCPUConfig {
> > > >bool ext_zkt;
> > > >bool ext_ifencei;
> > > >bool ext_icsr;
> > > > +bool ext_smstateen;
> > > >bool ext_svinval;
> > > >bool ext_svnapot;
> > > >bool ext_svpbmt;
> > > > diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> > > > index 6be5a9e9f0..56b7c5bed6 100644
> > > > --- a/target/riscv/cpu_bits.h
> > > > +++ b/target/riscv/cpu_bits.h
> > > > @@ -199,6 +199,12 @@
> > > >/* Supervisor Configuration CSRs */
> > > >#define CSR_SENVCFG 0x10A
> > > >
> > > > +/* Supervisor state CSRs */
> > > > +#define CSR_SSTATEEN0   0x10C
> > > > +#define CSR_SSTATEEN1   0x10D
> > > > +#define CSR_SSTATEEN2   0x10E
> > > > +#define CSR_SSTATEEN3   0x10F
> > > > +
> > > >/* Supervisor Trap Handling */
> > > >#define CSR_SSCRATCH0x140
> > > >#define CSR_SEPC0x141
> > > > @@ -242,6 +248,16 @@
> > > >#define CSR_HENVCFG 0x60A
> > > >#define CSR_HENVCFGH0x61A
> > > >
> > > > +/* Hypervisor state CSRs */
> > > > +#define CSR_HSTATEEN0   0x60C
> > > > +#define CSR_HSTATEEN0H  0x61C
> > > > +#define CSR_HSTATEEN1   0x60D
> > > > +#define CSR_HSTATEEN1H  0x61D
> > > > +#define CSR_HSTATEEN2   0x60E
> > > > +#define CSR_HSTATEEN2H  0x61E
> > > > +#define CSR_HSTATEEN3   0x60F
> > > > +#define CSR_HSTATEEN3H  0x61F
> > > > +
> > > >/* Virtual CSRs */
> > > >#define CSR_VSSTATUS0x200
> > > >#define CSR_VSIE0x204
> > > > @@ -283,6 +299,27 @@
> > > >#define CSR_MENVCFG 0x30A
> > > >#define CSR_MENVCFGH0x31A
> > > >
> > > > +/* Machine state CSRs */
> > > > +#define CSR_MSTATEEN0   0x30C
> > > > +#define CSR_MSTATEEN0H  0x31C
> > > > +#define CSR_MSTATEEN1   0x30D
> > > > +#define CSR_MSTATEEN1H  0x31D
> > > > +#define CSR_MSTATEEN2   0x30E
> > > > +#define CSR_MSTATEEN2H  0x31E
> > > > +#define CSR_MSTATEEN3   0x30F
> > > > +#define CSR_MSTATEEN3H  0x31F
> > > > +
> > > > +/* Common defines for all smstateen */
> > > > +#define SMSTATEEN_MAX_COUNT 4
> > > > +#define SMSTATEEN0_CS   (1ULL << 0)
> > > > +#define SMSTATEEN0_FCSR (1ULL << 1)
> > > > +#define SMSTATEEN0_HSCONTXT (1ULL << 57)
> > > > +#define SMSTATEEN0_IMSIC(1ULL << 58)
> > > > +#define SMSTATEEN0_AIA  (1ULL << 59)
> > > > +#define SMSTATEEN0_SVSLCT   (1ULL << 60)
> > > > +#define SMSTATEEN0_HSENVCFG (1ULL << 62)
> > > > +#define SMSTATEEN_STATEN(1ULL << 63)
> > > Maybe  SMSTATEEN_STATEEN better.
> > ok. Will update in the next version.
> > > > +
> > > >/* Enhanced Physical Memory Protection (ePMP) */
> > > >#define CSR_MSECCFG 0x747
> > > >#define CSR_MSECCFGH0x757
> > > > diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> > > > index 235f2a011e..27032a416c 100644
> > > > --- a/target/riscv/csr.c
> > > > +++ b/target/riscv/csr.c
> > > > @@ -339,6 +339,68 @@ static RISCVException
> > > > hmode32(CPURISCVState
> > > > *env, int csrno)
> > > >
> > > >}
> > > >
> > > > +static RISCVException mstateen(CPURISCVState *env, int csrno)
> > > > +{
> > > > +CPUState *cs = env_cpu(env);
> > > > +RISCVCPU *cpu = RISCV_CPU(cs);
> > > > +
> > > > +if (!cpu->cfg.ext_smstateen) {
> > > > +return RISCV_EXCP_ILLEGAL_INST;
> > > > +}
> > > > +
> > > > +return any(env, csrno);
> > > > +}
> > > > +
> > > > +static RISCVException hstateen_pred(CPURISCVState *env, int
> > > > csrno,
> > > > int base)
> > 

Re: VIRTIO_NET_F_MTU not negotiated

2022-07-27 Thread Jason Wang
On Thu, Jul 28, 2022 at 1:39 PM Eli Cohen  wrote:
>
> > From: Jason Wang 
> > Sent: Thursday, July 28, 2022 5:09 AM
> > To: Eli Cohen 
> > Cc: Eugenio Perez Martin ; qemu-devel@nongnu.org; 
> > Michael S. Tsirkin ;
> > virtualizat...@lists.linux-foundation.org
> > Subject: Re: VIRTIO_NET_F_MTU not negotiated
> >
> > On Wed, Jul 27, 2022 at 2:52 PM Eli Cohen  wrote:
> > >
> > > I found out that the reason why I could not enforce the mtu stems from 
> > > the fact that I did not configure max mtu for the net device
> > (e.g. through libvirt ).
> > > Libvirt does not allow this configuration for vdpa devices and probably 
> > > for a reason. The vdpa backend driver has the freedom to do
> > it using its copy of virtio_net_config.
> > >
> > > The code in qemu that is responsible to allow to consider the device MTU 
> > > restriction is here:
> > >
> > > static void virtio_net_device_realize(DeviceState *dev, Error **errp)
> > > {
> > > VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > VirtIONet *n = VIRTIO_NET(dev);
> > > NetClientState *nc;
> > > int i;
> > >
> > > if (n->net_conf.mtu) {
> > > n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
> > > }
> > >
> > > The above code can be interpreted as follows:
> > > if the command line arguments of qemu indicates that mtu should be 
> > > limited, then we would read this mtu limitation from the
> > device (that actual value is ignored).
> > >
> > > I worked around this limitation by unconditionally setting 
> > > VIRTIO_NET_F_MTU in the host features. As said, it only indicates that
> > we should read the actual limitation for the device.
> > >
> > > If this makes sense I can send a patch to fix this.
> >
> > I wonder whether it's worth to bother:
> >
> > 1) mgmt (above libvirt) should have the knowledge to prepare the correct XML
> > 2) it's not specific to MTU, we had other features work like, for
> > example, the multiqueue?
> >
>
>
> Currently libvirt does not recognize setting the mtu through XML for vdpa 
> device. So you mean the fix should go to libvirt?

Probably.

> Furthermore, even if libvirt supports MTU configuration for a vdpa device, 
> the actual value provided will be ignored and the limitation will be taken 
> from what the vdpa device published in its virtio_net_config structure. That 
> makes the XML configuration binary.

Yes, we suffer from a similar issue for "queues=". I think we should
fix qemu by failing the initialization if the value provided by cli
doesn't match what is read from config space.

E.g when mtu=9000 was set by cli but the actual mtu is 1500.

Thanks

>
> > Thanks
>




RE: VIRTIO_NET_F_MTU not negotiated

2022-07-27 Thread Eli Cohen
> From: Jason Wang 
> Sent: Thursday, July 28, 2022 5:09 AM
> To: Eli Cohen 
> Cc: Eugenio Perez Martin ; qemu-devel@nongnu.org; 
> Michael S. Tsirkin ;
> virtualizat...@lists.linux-foundation.org
> Subject: Re: VIRTIO_NET_F_MTU not negotiated
> 
> On Wed, Jul 27, 2022 at 2:52 PM Eli Cohen  wrote:
> >
> > I found out that the reason why I could not enforce the mtu stems from the 
> > fact that I did not configure max mtu for the net device
> (e.g. through libvirt ).
> > Libvirt does not allow this configuration for vdpa devices and probably for 
> > a reason. The vdpa backend driver has the freedom to do
> it using its copy of virtio_net_config.
> >
> > The code in qemu that is responsible to allow to consider the device MTU 
> > restriction is here:
> >
> > static void virtio_net_device_realize(DeviceState *dev, Error **errp)
> > {
> > VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > VirtIONet *n = VIRTIO_NET(dev);
> > NetClientState *nc;
> > int i;
> >
> > if (n->net_conf.mtu) {
> > n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
> > }
> >
> > The above code can be interpreted as follows:
> > if the command line arguments of qemu indicates that mtu should be limited, 
> > then we would read this mtu limitation from the
> device (that actual value is ignored).
> >
> > I worked around this limitation by unconditionally setting VIRTIO_NET_F_MTU 
> > in the host features. As said, it only indicates that
> we should read the actual limitation for the device.
> >
> > If this makes sense I can send a patch to fix this.
> 
> I wonder whether it's worth to bother:
> 
> 1) mgmt (above libvirt) should have the knowledge to prepare the correct XML
> 2) it's not specific to MTU, we had other features work like, for
> example, the multiqueue?
> 


Currently libvirt does not recognize setting the mtu through XML for vdpa 
device. So you mean the fix should go to libvirt?
Furthermore, even if libvirt supports MTU configuration for a vdpa device, the 
actual value provided will be ignored and the limitation will be taken from 
what the vdpa device published in its virtio_net_config structure. That makes 
the XML configuration binary.

> Thanks



Re: [PATCH v3] target/ppc: Implement new wait variants

2022-07-27 Thread Joel Stanley
On Wed, 27 Jul 2022 at 13:49, Daniel Henrique Barboza
 wrote:
>
>
>
> On 7/20/22 10:33, Nicholas Piggin wrote:
> > ISA v2.06 adds new variations of wait, specified by the WC field. These
> > are not all compatible with the prior wait implementation, because they
> > add additional conditions that cause the processor to resume, which can
> > cause software to hang or run very slowly.
>
> So I suppose this is not a new feature, but a bug fix to remediate these hangs
> cause by the incompatibility of the WC field  with other ISA versions. Is
> that right?

That's the case. Nick has some kernel patches that make Linux use the
new opcode:

 https://lore.kernel.org/all/20220720132132.903462-1-npig...@gmail.com/

With these applied the kernel hangs during boot if more than one CPU
is present. I was able to reproduce with ppc64le_defconfig and this
command line:

 qemu-system-ppc64 -M pseries,x-vof=on -cpu POWER10 -smp 2 -nographic
-kernel zImage.pseries -no-reboot

Qemu will exit (as there's no filesystem) if the test "passes", or
hang during boot if it hits the bug.

There's a kernel here with the patches applied in case someone else
wants to test:

https://ozlabs.org/~joel/zImage.pseries-v5.19-rc8-wait-v3

Tested-by: Joel Stanley 

Because of the hang it would be best if we merged the patch as a fix
sooner rather than later.

Cheers,

Joel

> I'm explicitly asking for it because if it's a bug fix it's ok to pick it
> during the freeze. Especially here, given that what you're doing is mostly
> adding no-ops for conditions that we're not covering.
>
> >
> > ISA v3.0 changed the wait opcode and removed the new variants (retaining
> > the WC field but making non-zero values reserved).
> >
> > ISA v3.1 added new WC values to the new wait opcode, and added a PL
> > field.
> >
> > This implements the new wait encoding and supports WC variants with
> > no-op implementations, which provides basic correctness as explained in
> > comments.
> >
> > Signed-off-by: Nicholas Piggin 
> > ---
> > v3:
> > - Add EXTRACT_HELPERs
> > - Reserved fields should be ignored, not trap.
> > - v3.1 defines special case of reserved PL values being treated as
> >a no-op when WC=2.
> > - Change code organization to (hopefully) be easier to follow each
> >ISA / variation.
> > - Tested old wait variant with Linux e6500 boot and verify that
> >gen_wait is called and takes the expected path.
> >
> > Thanks,
> > Nick
> >
> >   target/ppc/internal.h  |  3 ++
> >   target/ppc/translate.c | 96 ++
> >   2 files changed, 91 insertions(+), 8 deletions(-)
> >
> > diff --git a/target/ppc/internal.h b/target/ppc/internal.h
> > index 2add128cd1..57c0a42a6b 100644
> > --- a/target/ppc/internal.h
> > +++ b/target/ppc/internal.h
> > @@ -168,6 +168,9 @@ EXTRACT_HELPER_SPLIT_3(DX, 10, 6, 6, 5, 16, 1, 1, 0, 0)
> >   /* darn */
> >   EXTRACT_HELPER(L, 16, 2);
> >   #endif
> > +/* wait */
> > +EXTRACT_HELPER(WC, 21, 2);
> > +EXTRACT_HELPER(PL, 16, 2);
> >
> >   /***Jump target decoding  
> >  ***/
> >   /* Immediate address */
> > diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> > index 1d6daa4608..e0a835ac90 100644
> > --- a/target/ppc/translate.c
> > +++ b/target/ppc/translate.c
> > @@ -4066,12 +4066,91 @@ static void gen_sync(DisasContext *ctx)
> >   /* wait */
> >   static void gen_wait(DisasContext *ctx)
> >   {
> > -TCGv_i32 t0 = tcg_const_i32(1);
> > -tcg_gen_st_i32(t0, cpu_env,
> > -   -offsetof(PowerPCCPU, env) + offsetof(CPUState, 
> > halted));
> > -tcg_temp_free_i32(t0);
> > -/* Stop translation, as the CPU is supposed to sleep from now */
> > -gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next);
> > +uint32_t wc;
> > +
> > +if (ctx->insns_flags & PPC_WAIT) {
> > +/* v2.03-v2.07 define an older incompatible 'wait' encoding. */
> > +
> > +if (ctx->insns_flags2 & PPC2_PM_ISA206) {
> > +/* v2.06 introduced the WC field. WC > 0 may be treated as 
> > no-op. */
> > +wc = WC(ctx->opcode);
> > +} else {
> > +wc = 0;
> > +}
> > +
> > +} else if (ctx->insns_flags2 & PPC2_ISA300) {
> > +/* v3.0 defines a new 'wait' encoding. */
> > +wc = WC(ctx->opcode);
>
>
> The ISA seems to indicate that WC=3 is always reserved in both ISA300 and
> ISA310. I believe you can check for WC=3 and gen_invalid() return right
> off the bat at this point.
>
>
> Thanks,
>
>
> Daniel
>
>
>
> > +if (ctx->insns_flags2 & PPC2_ISA310) {
> > +uint32_t pl = PL(ctx->opcode);
> > +
> > +/* WC 1,2 may be treated as no-op. WC 3 is reserved. */
> > +if (wc == 3) {
> > +gen_invalid(ctx);
> > +return;
> > +}
> > +
> > +/* PL 1-3 are reserved. If WC=2 then the insn is treated as 
> > noop. */
> > +if (pl > 0 && wc != 2) {
> > +

Re: [RFC patch 0/1] block: vhost-blk backend

2022-07-27 Thread Andrey Zhadchenko



On 7/27/22 16:06, Stefano Garzarella wrote:

On Tue, Jul 26, 2022 at 04:15:48PM +0200, Denis V. Lunev wrote:

On 26.07.2022 15:51, Michael S. Tsirkin wrote:

On Mon, Jul 25, 2022 at 11:55:26PM +0300, Andrey Zhadchenko wrote:

Although QEMU virtio-blk is quite fast, there is still some room for
improvements. Disk latency can be reduced if we handle virito-blk 
requests

in host kernel so we avoid a lot of syscalls and context switches.

The biggest disadvantage of this vhost-blk flavor is raw format.
Luckily Kirill Thai proposed device mapper driver for QCOW2 format 
to attach
files as block devices: 
https://www.spinics.net/lists/kernel/msg4292965.html

That one seems stalled. Do you plan to work on that too?

We have too. The difference in numbers, as you seen below is quite too
much. We have waited for this patch to be sent to keep pushing.

It should be noted that may be talk on OSS this year could also push a 
bit.


Cool, the results are similar of what I saw when I compared vhost-blk 
and io_uring passthrough with NVMe (Slide 7 here: [1]).


About QEMU block layer support, we recently started to work on libblkio 
[2]. Stefan also sent an RFC [3] to implement the QEMU BlockDriver.

Currently it supports virtio-blk devices using vhost-vdpa and vhost-user.
We could add support for vhost (kernel) as well, though, we were 
thinking of leveraging vDPA to implement in-kernel software device as well.


That way we could reuse a lot of the code to support both hardware and 
software accelerators.


In the talk [1] I describe the idea a little bit, and a few months ago I 
did a PoC (unsubmitted RFC) to see if it was feasible and the numbers 
were in line with vhost-blk.


Do you think we could join forces and just have an in-kernel vdpa-blk 
software device?


This seems worth trying. Why double the efforts to do the same. Yet I 
would like to play a bit with your vdpa-blk PoC beforehand. Can you send 
it to me with some instructions how to run it?




Of course we could have both vhost-blk and vdpa-blk, but with vDPA 
perhaps we can have one software stack to maintain for both HW and 
software accelerators.


Thanks,
Stefano

[1] 
https://kvmforum2021.sched.com/event/ke3a/vdpa-blk-unified-hardware-and-software-offload-for-virtio-blk-stefano-garzarella-red-hat 


[2] https://gitlab.com/libblkio/libblkio
[3] 
https://lore.kernel.org/qemu-devel/20220708041737.1768521-1-stefa...@redhat.com/ 







Re: [PATCH 08/16] vhost: add op to enable or disable a single vring

2022-07-27 Thread Jason Wang
On Wed, Jul 27, 2022 at 3:05 PM Kangjie Xu  wrote:
>
>
> 在 2022/7/27 12:55, Jason Wang 写道:
> > On Tue, Jul 26, 2022 at 2:39 PM Kangjie Xu  
> > wrote:
> >>
> >> 在 2022/7/26 11:49, Jason Wang 写道:
> >>> 在 2022/7/18 19:17, Kangjie Xu 写道:
>  The interface to set enable status for a single vring is lacked in
>  VhostOps, since the vhost_set_vring_enable_op will manipulate all
>  virtqueues in a device.
> 
>  Resetting a single vq will rely on this interface. It requires a
>  reply to indicate that the reset operation is finished, so the
>  parameter, wait_for_reply, is added.
> >>>
> >>> The wait reply seems to be a implementation specific thing. Can we
> >>> hide it?
> >>>
> >>> Thanks
> >>>
> >> I do not hide wait_for_reply here because when stopping the queue, a
> >> reply is needed to ensure that the message has been processed and queue
> >> has been disabled.
> > This needs to be done at vhost-backend level instead of the general vhost 
> > code.
> >
> > E.g vhost-kernel or vDPA is using ioctl() which is synchronous.
> Yeah, I understand your concerns, will fix it in the next version.
> >> When restarting the queue, we do not need a reply, which is the same as
> >> what qemu does in vhost_dev_start().
> >>
> >> So I add this parameter to distinguish the two cases.
> >>
> >> I think one alternative implementation is to add a interface in
> >> VhostOps: queue_reset(). In this way details can be hidden. What do you
> >> think of this solution? Does it look better?
> > Let me ask it differently, under which case can we call this function
> > with wait_for_reply = false?
> >
> > Thanks
>
> Cases when we do not need to wait until that the reply has been
> processed. Actually in dev_start(), or dev_stop(),

But we don't need to send virtqueue reset requests in those cases?

> they do not wait for
> replies when enabling/disabling vqs.
>
> Specifically, vhost_set_vring_enable() will call it with wait_for_reply
> = false.
>
> In our vq reset scenario, it should be synchronous because the driver
> need to modify queues after the device stop using the vq in the backend.
> Undefined errors will occur If the device is still using the queue when
> the driver is making modifications to it,
>
> Back to our implementation, we will not expose this parameter in the
> next version.

Ok.

Thanks

>
> Thanks.
>
> >> Thanks
> >>
>  Signed-off-by: Kangjie Xu 
>  Signed-off-by: Xuan Zhuo 
>  ---
> include/hw/virtio/vhost-backend.h | 4 
> 1 file changed, 4 insertions(+)
> 
>  diff --git a/include/hw/virtio/vhost-backend.h
>  b/include/hw/virtio/vhost-backend.h
>  index eab46d7f0b..7bddd1e9a0 100644
>  --- a/include/hw/virtio/vhost-backend.h
>  +++ b/include/hw/virtio/vhost-backend.h
>  @@ -81,6 +81,9 @@ typedef int (*vhost_set_backend_cap_op)(struct
>  vhost_dev *dev);
> typedef int (*vhost_set_owner_op)(struct vhost_dev *dev);
> typedef int (*vhost_reset_device_op)(struct vhost_dev *dev);
> typedef int (*vhost_get_vq_index_op)(struct vhost_dev *dev, int idx);
>  +typedef int (*vhost_set_single_vring_enable_op)(struct vhost_dev *dev,
>  +int index, int enable,
>  +bool wait_for_reply);
> typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev,
>  int enable);
> typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev);
>  @@ -155,6 +158,7 @@ typedef struct VhostOps {
> vhost_set_owner_op vhost_set_owner;
> vhost_reset_device_op vhost_reset_device;
> vhost_get_vq_index_op vhost_get_vq_index;
>  +vhost_set_single_vring_enable_op vhost_set_single_vring_enable;
> vhost_set_vring_enable_op vhost_set_vring_enable;
> vhost_requires_shm_log_op vhost_requires_shm_log;
> vhost_migration_done_op vhost_migration_done;
>




Re: VIRTIO_NET_F_MTU not negotiated

2022-07-27 Thread Jason Wang
On Wed, Jul 27, 2022 at 2:52 PM Eli Cohen  wrote:
>
> I found out that the reason why I could not enforce the mtu stems from the 
> fact that I did not configure max mtu for the net device (e.g. through 
> libvirt ).
> Libvirt does not allow this configuration for vdpa devices and probably for a 
> reason. The vdpa backend driver has the freedom to do it using its copy of 
> virtio_net_config.
>
> The code in qemu that is responsible to allow to consider the device MTU 
> restriction is here:
>
> static void virtio_net_device_realize(DeviceState *dev, Error **errp)
> {
> VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> VirtIONet *n = VIRTIO_NET(dev);
> NetClientState *nc;
> int i;
>
> if (n->net_conf.mtu) {
> n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
> }
>
> The above code can be interpreted as follows:
> if the command line arguments of qemu indicates that mtu should be limited, 
> then we would read this mtu limitation from the device (that actual value is 
> ignored).
>
> I worked around this limitation by unconditionally setting VIRTIO_NET_F_MTU 
> in the host features. As said, it only indicates that we should read the 
> actual limitation for the device.
>
> If this makes sense I can send a patch to fix this.

I wonder whether it's worth to bother:

1) mgmt (above libvirt) should have the knowledge to prepare the correct XML
2) it's not specific to MTU, we had other features work like, for
example, the multiqueue?

Thanks




Re: [RFC v4 2/9] qemu-io: add zoned block device operations.

2022-07-27 Thread Damien Le Moal
On 7/27/22 23:13, Stefan Hajnoczi wrote:
> On Mon, 11 Jul 2022 at 22:17, Sam Li  wrote:
>> +int bdrv_co_zone_report(BlockDriverState *bs, int64_t offset,
>> +int64_t *nr_zones,
>> +BlockZoneDescriptor *zones)
>> +{
>> +BlockDriver *drv = bs->drv;
>> +CoroutineIOCompletion co = {
>> +.coroutine = qemu_coroutine_self(),
>> +};
>> +IO_CODE();
>> +
>> +bdrv_inc_in_flight(bs);
>> +if (!drv || (!drv->bdrv_co_zone_report)) {

You can drop the inner parenthesis for "(!drv->bdrv_co_zone_report)".

>> +co.ret = -ENOTSUP;
>> +goto out;
>> +}
>> +
>> +if (drv->bdrv_co_zone_report) {
> 
> At this point we know drv->bdrv_co_zone_report is non-NULL because it
> has been checked already. The if statement can be dropped.
> 
>> +co.ret = drv->bdrv_co_zone_report(bs, offset, nr_zones, zones);
>> +} else {
>> +co.ret = -ENOTSUP;
> 
> This case is already handled by if (... ||
> (!drv->bdrv_co_zone_report)) above. The else body can be dropped.
> 
>> +goto out;
>> +qemu_coroutine_yield();
>> +}
>> +
>> +out:
>> +bdrv_dec_in_flight(bs);
>> +return co.ret;
>> +}
>> +
>> +int bdrv_co_zone_mgmt(BlockDriverState *bs, enum zone_op op,
> 
> Please follow QEMU coding style and typedef BdrvZoneOp instead of
> writing out enum zone_op.
> 
>> +int64_t offset, int64_t len)
>> +{
>> +BlockDriver *drv = bs->drv;
>> +CoroutineIOCompletion co = {
>> +.coroutine = qemu_coroutine_self(),
>> +};
>> +IO_CODE();
>> +
>> +bdrv_inc_in_flight(bs);
>> +if (!drv || (!drv->bdrv_co_zone_mgmt)) {
>> +co.ret = -ENOTSUP;
>> +goto out;
>> +}
>> +
>> +if (drv->bdrv_co_zone_mgmt) {
>> +co.ret = drv->bdrv_co_zone_mgmt(bs, op, offset, len);
>> +} else {
>> +co.ret = -ENOTSUP;
>> +goto out;
>> +qemu_coroutine_yield();
>> +}
> 
> Same comments here.
> 
>> +
>> +out:
>> +bdrv_dec_in_flight(bs);
>> +return co.ret;
>> +}
>> +
>>  void *qemu_blockalign(BlockDriverState *bs, size_t size)
>>  {
>>  IO_CODE();
>> diff --git a/include/block/block-io.h b/include/block/block-io.h
>> index 053a27141a..a0ae140452 100644
>> --- a/include/block/block-io.h
>> +++ b/include/block/block-io.h
>> @@ -80,6 +80,13 @@ int bdrv_co_ioctl(BlockDriverState *bs, int req, void 
>> *buf);
>>  /* Ensure contents are flushed to disk.  */
>>  int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
>>
>> +/* Report zone information of zone block device. */
>> +int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, int64_t offset,
>> + int64_t *nr_zones,
>> + BlockZoneDescriptor *zones);
>> +int coroutine_fn bdrv_co_zone_mgmt(BlockDriverState *bs, zone_op op,
>> +   int64_t offset, int64_t len);
>> +
>>  int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
>>  bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
>>  int bdrv_block_status(BlockDriverState *bs, int64_t offset,
>> @@ -289,6 +296,12 @@ bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector 
>> *qiov, int64_t pos);
>>  int generated_co_wrapper
>>  bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
>>
>> +int generated_co_wrapper
>> +blk_zone_report(BlockBackend *blk, int64_t offset, int64_t *nr_zones,
>> +BlockZoneDescriptor *zones);
>> +int generated_co_wrapper
>> +blk_zone_mgmt(BlockBackend *blk, enum zone_op op, int64_t offset, int64_t 
>> len);
>> +
>>  /**
>>   * bdrv_parent_drained_begin_single:
>>   *
>> diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
>> index 2f0d8ac25a..a88fa322d2 100644
>> --- a/qemu-io-cmds.c
>> +++ b/qemu-io-cmds.c
>> @@ -1706,6 +1706,144 @@ static const cmdinfo_t flush_cmd = {
>>  .oneline= "flush all in-core file state to disk",
>>  };
>>
>> +static int zone_report_f(BlockBackend *blk, int argc, char **argv)
>> +{
>> +int ret;
>> +int64_t offset, nr_zones;
>> +
>> +++optind;
>> +offset = cvtnum(argv[optind]);
>> +++optind;
>> +nr_zones = cvtnum(argv[optind]);
>> +
>> +g_autofree BlockZoneDescriptor *zones = NULL;
>> +zones = g_new(BlockZoneDescriptor, nr_zones);
>> +ret = blk_zone_report(blk, offset, _zones, zones);
>> +if (ret < 0) {
>> +printf("zone report failed: %s\n", strerror(-ret));
>> +} else {
>> +for (int i = 0; i < nr_zones; ++i) {
>> +printf("start: 0x%" PRIx64 ", len 0x%" PRIx64 ", "
>> +   "cap"" 0x%" PRIx64 ",wptr 0x%" PRIx64 ", "
>> +   "zcond:%u, [type: %u]\n",
>> +   zones[i].start, zones[i].length, zones[i].cap, 
>> zones[i].wp,
>> +   zones[i].cond, zones[i].type);
>> +}
>> +}
>> +return ret;
>> +}
>> +
>> +static const cmdinfo_t zone_report_cmd = {
>> +.name = "zone_report",
>> + 

Re: virtio: why no full reset on virtio_set_status 0 ?

2022-07-27 Thread Jason Wang
On Wed, Jul 27, 2022 at 11:32 PM Michael S. Tsirkin  wrote:
>
> On Wed, Jul 27, 2022 at 12:51:31PM +0200, Claudio Fontana wrote:
> > Hi Michael and all,
> >
> > I have started researching a qemu / ovs / dpdk bug:
> >
> > https://inbox.dpdk.org/dev/322122fb-619d-96f6-5c3e-9eabdbf38...@redhat.com/T/
> >
> > that seems to be affecting multiple parties in the telco space,
> >
> > and during this process I noticed that qemu/hw/virtio/virtio.c does not do 
> > a full virtio reset
> > in virtio_set_status, when receiving a status value of 0.
> >
> > It seems it has always been this way, so I am clearly missing / forgetting 
> > something basic,
> >
> > I checked the virtio spec at https://docs.oasis-open.org/
> >
> > and from:
> >
> > "
> > 4.1.4.3 Common configuration structure layout
> >
> > device_status
> > The driver writes the device status here (see 2.1). Writing 0 into this 
> > field resets the device.
> >
> > "
> >
> > and
> >
> > "
> > 2.4.1 Device Requirements: Device Reset
> > A device MUST reinitialize device status to 0 after receiving a reset.
> > "
> >
> > I would conclude that in virtio.c::virtio_set_status we should 
> > unconditionally do a full virtio_reset.
> >
> > Instead, we have just the check:
> >
> > if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
> > (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
> > virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
> > }
> >
> > which just sets the started field,
> >
> > and then we have the call to the virtio device class set_status 
> > (virtio_net...),
> > but the VirtioDevice is not fully reset, as per the virtio_reset() call we 
> > are missing:
> >
> > "
> > vdev->start_on_kick = false;
> > vdev->started = false;
> > vdev->broken = false;
> > vdev->guest_features = 0;
> > vdev->queue_sel = 0;
> > vdev->status = 0;
> > vdev->disabled = false;
> > qatomic_set(>isr, 0);
> > vdev->config_vector = VIRTIO_NO_VECTOR;
> > virtio_notify_vector(vdev, vdev->config_vector);
> >
> > for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
> > ... initialize vdev->vq[i] ...
> > }
> > "
> >
> > Doing a full reset seems to fix the problem for me, so I can send tentative 
> > patches if necessary,
> > but what am I missing here?
> >
> > Thanks,
> >
> > Claudio
> >
> > --
> > Claudio Fontana
> > Engineering Manager Virtualization, SUSE Labs Core
> >
> > SUSE Software Solutions Italy Srl
>
>
> So for example for pci:
>
> case VIRTIO_PCI_STATUS:
>
>
> 
>
> if (vdev->status == 0) {
> virtio_pci_reset(DEVICE(proxy));
> }
>
> which I suspect is a bug because:
>
> static void virtio_pci_reset(DeviceState *qdev)
> {
> VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
> VirtioBusState *bus = VIRTIO_BUS(>bus);
> PCIDevice *dev = PCI_DEVICE(qdev);
> int i;
>
> virtio_bus_reset(bus);

Note that we do virtio_reset() here.

> msix_unuse_all_vectors(>pci_dev);
>
> for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
> proxy->vqs[i].enabled = 0;
> proxy->vqs[i].num = 0;
> proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
> proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
> proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
> }
>
>
> so far so good
>
> if (pci_is_express(dev)) {
> pcie_cap_deverr_reset(dev);
> pcie_cap_lnkctl_reset(dev);
>
> pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0);
> }
>
> this part is wrong I think, it got here by mistake since the same
> function is used for bus level reset.
>
> Jason, Marcel, any input?

Yes, I think we don't need PCI stuff here. We do virtio reset not pci.

Thanks

>
> --
> MST
>




[PULL 0/2] riscv-to-apply queue

2022-07-27 Thread Alistair Francis
From: Alistair Francis 

The following changes since commit 7b17a1a841fc2336eba53afade9cadb14bd3dd9a:

  Update version for v7.1.0-rc0 release (2022-07-26 18:03:16 -0700)

are available in the Git repository at:

  g...@github.com:alistair23/qemu.git tags/pull-riscv-to-apply-20220728

for you to fetch changes up to 54f218363052be210e77d2ada8c0c1e51b3ad6cd:

  hw/intc: sifive_plic: Fix multi-socket plic configuraiton (2022-07-28 
09:08:44 +1000)


Sixth RISC-V PR for QEMU 7.1

This is a PR to go in for RC1. It fixes a segfault that occurs
when using multiple sockets on the RISC-V virt board. It also
includes a small fix to allow both Zmmul and M extensions.

* Allow both Zmmul and M extension
* Fix multi-socket plic configuraiton


Atish Patra (1):
  hw/intc: sifive_plic: Fix multi-socket plic configuraiton

Palmer Dabbelt (1):
  RISC-V: Allow both Zmmul and M

 hw/intc/sifive_plic.c | 4 ++--
 target/riscv/cpu.c| 5 -
 2 files changed, 2 insertions(+), 7 deletions(-)



[PULL 2/2] hw/intc: sifive_plic: Fix multi-socket plic configuraiton

2022-07-27 Thread Alistair Francis
From: Atish Patra 

Since commit 40244040a7ac, multi-socket configuration with plic is
broken as the hartid for second socket is calculated incorrectly.
The hartid stored in addr_config already includes the offset
for the base hartid for that socket. Adding it again would lead
to segfault while creating the plic device for the virt machine.
qdev_connect_gpio_out was also invoked with incorrect number of gpio
lines.

Fixes: 40244040a7ac (hw/intc: sifive_plic: Avoid overflowing the addr_config 
buffer)

Signed-off-by: Atish Patra 
Reviewed-by: Alistair Francis 
Message-Id: <20220723090335.671105-1-ati...@rivosinc.com>
[ Changes by AF:
 - Change the qdev_connect_gpio_out() numbering
]
Signed-off-by: Alistair Francis 
---
 hw/intc/sifive_plic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c
index 56d60e9ac9..af4ae3630e 100644
--- a/hw/intc/sifive_plic.c
+++ b/hw/intc/sifive_plic.c
@@ -454,10 +454,10 @@ DeviceState *sifive_plic_create(hwaddr addr, char 
*hart_config,
 
 for (i = 0; i < plic->num_addrs; i++) {
 int cpu_num = plic->addr_config[i].hartid;
-CPUState *cpu = qemu_get_cpu(hartid_base + cpu_num);
+CPUState *cpu = qemu_get_cpu(cpu_num);
 
 if (plic->addr_config[i].mode == PLICMode_M) {
-qdev_connect_gpio_out(dev, num_harts + cpu_num,
+qdev_connect_gpio_out(dev, num_harts - plic->hartid_base + cpu_num,
   qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT));
 }
 if (plic->addr_config[i].mode == PLICMode_S) {
-- 
2.37.1




[PULL 1/2] RISC-V: Allow both Zmmul and M

2022-07-27 Thread Alistair Francis
From: Palmer Dabbelt 

We got to talking about how Zmmul and M interact with each other
https://github.com/riscv/riscv-isa-manual/issues/869 , and it turns out
that QEMU's behavior is slightly wrong: having Zmmul and M is a legal
combination, it just means that the multiplication instructions are
supported even when M is disabled at runtime via misa.

This just stops overriding M from Zmmul, with that the other checks for
the multiplication instructions work as per the ISA.

Signed-off-by: Palmer Dabbelt 
Reviewed-by: Alistair Francis 
Message-Id: <20220714180033.22385-1-pal...@rivosinc.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 1bb3973806..ac6f82ebd0 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -619,11 +619,6 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 cpu->cfg.ext_ifencei = true;
 }
 
-if (cpu->cfg.ext_m && cpu->cfg.ext_zmmul) {
-warn_report("Zmmul will override M");
-cpu->cfg.ext_m = false;
-}
-
 if (cpu->cfg.ext_i && cpu->cfg.ext_e) {
 error_setg(errp,
"I and E extensions are incompatible");
-- 
2.37.1




Re: [PATCH v11 2/6] target/riscv: Simplify counter predicate function

2022-07-27 Thread Weiwei Li


在 2022/7/28 上午5:40, Atish Kumar Patra 写道:



On Wed, Jul 27, 2022 at 1:35 AM Weiwei Li > wrote:



在 2022/7/27 下午2:49, Atish Patra 写道:
> All the hpmcounters and the fixed counters (CY, IR, TM) can be
represented
> as a unified counter. Thus, the predicate function doesn't need
handle each
> case separately.
>
> Simplify the predicate function so that we just handle things
differently
> between RV32/RV64 and S/HS mode.
>
> Reviewed-by: Bin Meng mailto:bmeng...@gmail.com>>
> Acked-by: Alistair Francis mailto:alistair.fran...@wdc.com>>
> Signed-off-by: Atish Patra mailto:ati...@rivosinc.com>>
> ---
>   target/riscv/csr.c | 112
+
>   1 file changed, 11 insertions(+), 101 deletions(-)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 1233bfa0a726..57dbbf9b09a0 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -74,6 +74,7 @@ static RISCVException ctr(CPURISCVState *env,
int csrno)
>       CPUState *cs = env_cpu(env);
>       RISCVCPU *cpu = RISCV_CPU(cs);
>       int ctr_index;
> +    target_ulong ctr_mask;
>       int base_csrno = CSR_CYCLE;
>       bool rv32 = riscv_cpu_mxl(env) == MXL_RV32 ? true : false;
>
> @@ -82,122 +83,31 @@ static RISCVException ctr(CPURISCVState
*env, int csrno)
>           base_csrno += 0x80;
>       }
>       ctr_index = csrno - base_csrno;
> +    ctr_mask = BIT(ctr_index);
>
>       if ((csrno >= CSR_CYCLE && csrno <= CSR_INSTRET) ||
>           (csrno >= CSR_CYCLEH && csrno <= CSR_INSTRETH)) {
>           goto skip_ext_pmu_check;
>       }
>
> -    if ((!cpu->cfg.pmu_num || !(cpu->pmu_avail_ctrs &
BIT(ctr_index {
> +    if (!(cpu->pmu_avail_ctrs & ctr_mask)) {
>           /* No counter is enabled in PMU or the counter is out
of range */
>           return RISCV_EXCP_ILLEGAL_INST;
>       }
>
>   skip_ext_pmu_check:
>
> -    if (env->priv == PRV_S) {
> -        switch (csrno) {
> -        case CSR_CYCLE:
> -            if (!get_field(env->mcounteren, COUNTEREN_CY)) {
> -                return RISCV_EXCP_ILLEGAL_INST;
> -            }
> -            break;
> -        case CSR_TIME:
> -            if (!get_field(env->mcounteren, COUNTEREN_TM)) {
> -                return RISCV_EXCP_ILLEGAL_INST;
> -            }
> -            break;
> -        case CSR_INSTRET:
> -            if (!get_field(env->mcounteren, COUNTEREN_IR)) {
> -                return RISCV_EXCP_ILLEGAL_INST;
> -            }
> -            break;
> -        case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
> -            if (!get_field(env->mcounteren, 1 << ctr_index)) {
> -                return RISCV_EXCP_ILLEGAL_INST;
> -            }
> -            break;
> -        }
> -        if (rv32) {
> -            switch (csrno) {
> -            case CSR_CYCLEH:
> -                if (!get_field(env->mcounteren, COUNTEREN_CY)) {
> -                    return RISCV_EXCP_ILLEGAL_INST;
> -                }
> -                break;
> -            case CSR_TIMEH:
> -                if (!get_field(env->mcounteren, COUNTEREN_TM)) {
> -                    return RISCV_EXCP_ILLEGAL_INST;
> -                }
> -                break;
> -            case CSR_INSTRETH:
> -                if (!get_field(env->mcounteren, COUNTEREN_IR)) {
> -                    return RISCV_EXCP_ILLEGAL_INST;
> -                }
> -                break;
> -            case CSR_HPMCOUNTER3H...CSR_HPMCOUNTER31H:
> -                if (!get_field(env->mcounteren, 1 << ctr_index)) {
> -                    return RISCV_EXCP_ILLEGAL_INST;
> -                }
> -                break;
> -            }
> -        }
> +    if (((env->priv == PRV_S) && (!get_field(env->mcounteren,
ctr_mask))) ||
> +       ((env->priv == PRV_U) && (!get_field(env->scounteren,
ctr_mask {
> +        return RISCV_EXCP_ILLEGAL_INST;
>       }
>
>       if (riscv_cpu_virt_enabled(env)) {
> -        switch (csrno) {
> -        case CSR_CYCLE:
> -            if (!get_field(env->hcounteren, COUNTEREN_CY) &&
> -                get_field(env->mcounteren, COUNTEREN_CY)) {
> -                return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
> -            }
> -            break;
> -        case CSR_TIME:
> -            if (!get_field(env->hcounteren, COUNTEREN_TM) &&
> -                get_field(env->mcounteren, COUNTEREN_TM)) {
> -                return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
> -            }
> -            break;
> -        case CSR_INSTRET:
> -            if (!get_field(env->hcounteren, 

Re: [PATCH v11 5/6] target/riscv: Update the privilege field for sscofpmf CSRs

2022-07-27 Thread Atish Kumar Patra
On Wed, Jul 27, 2022 at 1:27 AM Weiwei Li  wrote:

>
> 在 2022/7/27 下午2:49, Atish Patra 写道:
> > The sscofpmf extension was ratified as a part of priv spec v1.12.
> > Mark the csr_ops accordingly.
> >
> > Reviewed-by: Alistair Francis 
> > Signed-off-by: Atish Patra 
> > ---
> >   target/riscv/csr.c | 90 ++
> >   1 file changed, 60 insertions(+), 30 deletions(-)
> >
> > diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> > index 57dbbf9b09a0..ec6d7f022ad5 100644
> > --- a/target/riscv/csr.c
> > +++ b/target/riscv/csr.c
> > @@ -3859,63 +3859,92 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
> >  write_mhpmevent
> },
> >
> >   [CSR_MHPMEVENT3H]= { "mhpmevent3h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
> write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
>
> Similar to the first commit, it's better to align with the first element
> "mhpmevent3h" .Otherwise,
>
>
Fixed it. Thanks for the review.


> Reviewed-by: Weiwei Li 
>
> Regards,
>
> Weiwei Li
>
> >   [CSR_MHPMEVENT4H]= { "mhpmevent4h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
> write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT5H]= { "mhpmevent5h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
> write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT6H]= { "mhpmevent6h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
> write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT7H]= { "mhpmevent7h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
> write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT8H]= { "mhpmevent8h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
> write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT9H]= { "mhpmevent9h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
> write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT10H]   = { "mhpmevent10h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT11H]   = { "mhpmevent11h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT12H]   = { "mhpmevent12h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT13H]   = { "mhpmevent13h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT14H]   = { "mhpmevent14h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT15H]   = { "mhpmevent15h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT16H]   = { "mhpmevent16h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT17H]   = { "mhpmevent17h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT18H]   = { "mhpmevent18h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT19H]   = { "mhpmevent19h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT20H]   = { "mhpmevent20h",sscofpmf,
> read_mhpmeventh,
> > -
>  write_mhpmeventh},
> > +
>  write_mhpmeventh,
> > + .min_priv_ver =
> PRIV_VERSION_1_12_0 },
> >   [CSR_MHPMEVENT21H]   

Re: add qemu_fdt_setprop_strings() and use it in most places

2022-07-27 Thread Ben Dooks
On Wed, Jul 27, 2022 at 11:39:00PM +0100, Ben Dooks wrote:
> Add a helper for qemu_fdt_setprop_strings() to take a set of strings
> to put into a device-tree, which removes several open-coded methods
> such as setting an char arr[] = {..} or setting char val[] = "str\0str2";
> 
> This is for hw/arm, hw/mips and hw/riscv as well as a couple of cores. It
> is not fully tested over all of those, I may not have caught all places
> this is to be replaced.

I've added a branch at
 https://github.com/bendooks/qemu.git dev/fdt-string-array

-- 
Ben Dooks, b...@fluff.org, http://www.fluff.org/ben/

Large Hadron Colada: A large Pina Colada that makes the universe disappear.




[PATCH v3 4/5] hw/mips: use qemu_fdt_setprop_strings()

2022-07-27 Thread Ben Dooks
Change to using qemu_fdt_setprop_strings() helper in hw/mips.

Signed-off-by: Ben Dooks 
---
 hw/mips/boston.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index d2ab9da1a0..759f6daafe 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -515,9 +515,6 @@ static const void *create_fdt(BostonState *s,
 MachineState *mc = s->mach;
 uint32_t platreg_ph, gic_ph, clk_ph;
 char *name, *gic_name, *platreg_name, *stdout_name;
-static const char * const syscon_compat[2] = {
-"img,boston-platform-regs", "syscon"
-};
 
 fdt = create_device_tree(dt_size);
 if (!fdt) {
@@ -608,9 +605,8 @@ static const void *create_fdt(BostonState *s,
 platreg_name = g_strdup_printf("/soc/system-controller@%" HWADDR_PRIx,
memmap[BOSTON_PLATREG].base);
 qemu_fdt_add_subnode(fdt, platreg_name);
-qemu_fdt_setprop_string_array(fdt, platreg_name, "compatible",
- (char **)_compat,
- ARRAY_SIZE(syscon_compat));
+qemu_fdt_setprop_strings(fdt, platreg_name, "compatible",
+ "img,boston-platform-regs", "syscon");
 qemu_fdt_setprop_cells(fdt, platreg_name, "reg",
memmap[BOSTON_PLATREG].base,
memmap[BOSTON_PLATREG].size);
-- 
2.35.1




[PATCH v3 2/5] hw/riscv: use qemu_fdt_setprop_strings() for string arrays

2022-07-27 Thread Ben Dooks
Use the qemu_fdt_setprop_strings() in sifve_u.c to simplify the code.

Signed-off-by: Ben Dooks 
---
 hw/riscv/sifive_u.c | 18 +-
 hw/riscv/spike.c|  7 ++-
 hw/riscv/virt.c | 32 
 3 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index e4c814a3ea..dc112a253a 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -103,13 +103,6 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry 
*memmap,
 char *nodename;
 uint32_t plic_phandle, prci_phandle, gpio_phandle, phandle = 1;
 uint32_t hfclk_phandle, rtcclk_phandle, phy_phandle;
-static const char * const ethclk_names[2] = { "pclk", "hclk" };
-static const char * const clint_compat[2] = {
-"sifive,clint0", "riscv,clint0"
-};
-static const char * const plic_compat[2] = {
-"sifive,plic-1.0.0", "riscv,plic0"
-};
 
 if (ms->dtb) {
 fdt = s->fdt = load_device_tree(ms->dtb, >fdt_size);
@@ -221,11 +214,11 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry 
*memmap,
 nodename = g_strdup_printf("/soc/clint@%lx",
 (long)memmap[SIFIVE_U_DEV_CLINT].base);
 qemu_fdt_add_subnode(fdt, nodename);
-qemu_fdt_setprop_string_array(fdt, nodename, "compatible",
-(char **)_compat, ARRAY_SIZE(clint_compat));
 qemu_fdt_setprop_cells(fdt, nodename, "reg",
 0x0, memmap[SIFIVE_U_DEV_CLINT].base,
 0x0, memmap[SIFIVE_U_DEV_CLINT].size);
+qemu_fdt_setprop_strings(fdt, nodename, "compatible",
+ "sifive,clint0", "riscv,clint0");
 qemu_fdt_setprop(fdt, nodename, "interrupts-extended",
 cells, ms->smp.cpus * sizeof(uint32_t) * 4);
 g_free(cells);
@@ -279,8 +272,8 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry 
*memmap,
 (long)memmap[SIFIVE_U_DEV_PLIC].base);
 qemu_fdt_add_subnode(fdt, nodename);
 qemu_fdt_setprop_cell(fdt, nodename, "#interrupt-cells", 1);
-qemu_fdt_setprop_string_array(fdt, nodename, "compatible",
-(char **)_compat, ARRAY_SIZE(plic_compat));
+qemu_fdt_setprop_strings(fdt, nodename, "compatbile",
+ "sifive,plic-1.0.0", "riscv,plic0");
 qemu_fdt_setprop(fdt, nodename, "interrupt-controller", NULL, 0);
 qemu_fdt_setprop(fdt, nodename, "interrupts-extended",
 cells, (ms->smp.cpus * 4 - 2) * sizeof(uint32_t));
@@ -426,8 +419,7 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry 
*memmap,
 qemu_fdt_setprop_cell(fdt, nodename, "interrupts", SIFIVE_U_GEM_IRQ);
 qemu_fdt_setprop_cells(fdt, nodename, "clocks",
 prci_phandle, PRCI_CLK_GEMGXLPLL, prci_phandle, PRCI_CLK_GEMGXLPLL);
-qemu_fdt_setprop_string_array(fdt, nodename, "clock-names",
-(char **)_names, ARRAY_SIZE(ethclk_names));
+qemu_fdt_setprop_strings(fdt, nodename, "clock-names", "pclk", "hclk");
 qemu_fdt_setprop(fdt, nodename, "local-mac-address",
 s->soc.gem.conf.macaddr.a, ETH_ALEN);
 qemu_fdt_setprop_cell(fdt, nodename, "#address-cells", 1);
diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
index e41b6aa9f0..aa895779cd 100644
--- a/hw/riscv/spike.c
+++ b/hw/riscv/spike.c
@@ -59,9 +59,6 @@ static void create_fdt(SpikeState *s, const MemMapEntry 
*memmap,
 uint32_t cpu_phandle, intc_phandle, phandle = 1;
 char *name, *mem_name, *clint_name, *clust_name;
 char *core_name, *cpu_name, *intc_name;
-static const char * const clint_compat[2] = {
-"sifive,clint0", "riscv,clint0"
-};
 
 fdt = s->fdt = create_device_tree(>fdt_size);
 if (!fdt) {
@@ -159,8 +156,8 @@ static void create_fdt(SpikeState *s, const MemMapEntry 
*memmap,
 (memmap[SPIKE_CLINT].size * socket);
 clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr);
 qemu_fdt_add_subnode(fdt, clint_name);
-qemu_fdt_setprop_string_array(fdt, clint_name, "compatible",
-(char **)_compat, ARRAY_SIZE(clint_compat));
+qemu_fdt_setprop_strings(fdt, clint_name, "compatible",
+ "sifive,clint0", "riscv,clint0");
 qemu_fdt_setprop_cells(fdt, clint_name, "reg",
 0x0, clint_addr, 0x0, memmap[SPIKE_CLINT].size);
 qemu_fdt_setprop(fdt, clint_name, "interrupts-extended",
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index bc424dd2f5..c6aaa611a6 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -261,11 +261,8 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int 
socket,
 intc_phandles[cpu]);
 if (riscv_feature(>soc[socket].harts[cpu].env,
   RISCV_FEATURE_AIA)) {
-static const char * const compat[2] = {
-"riscv,cpu-intc-aia", "riscv,cpu-intc"
-};
-qemu_fdt_setprop_string_array(mc->fdt, intc_name, "compatible",
-  (char **), ARRAY_SIZE(compat));
+

add qemu_fdt_setprop_strings() and use it in most places

2022-07-27 Thread Ben Dooks
Add a helper for qemu_fdt_setprop_strings() to take a set of strings
to put into a device-tree, which removes several open-coded methods
such as setting an char arr[] = {..} or setting char val[] = "str\0str2";

This is for hw/arm, hw/mips and hw/riscv as well as a couple of cores. It
is not fully tested over all of those, I may not have caught all places
this is to be replaced.





[PATCH v3 1/5] device_tree: add qemu_fdt_setprop_strings() helper

2022-07-27 Thread Ben Dooks
Add a helper to set a property from a set of strings
to reduce the following code:

static const char * const clint_compat[2] = {
"sifive,clint0", "riscv,clint0"
};

qemu_fdt_setprop_string_array(fdt, nodename, "compatible",
(char **)_compat, ARRAY_SIZE(clint_compat));

Signed-off-by: Ben Dooks 
---
v3;
 - fix return value for the call
 - add better help text
v2:
 - fix node/path in comment
---
 include/sysemu/device_tree.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h
index ef060a9759..83bdfe390e 100644
--- a/include/sysemu/device_tree.h
+++ b/include/sysemu/device_tree.h
@@ -87,6 +87,25 @@ int qemu_fdt_setprop_string(void *fdt, const char *node_path,
 int qemu_fdt_setprop_string_array(void *fdt, const char *node_path,
   const char *prop, char **array, int len);
 
+/**
+ * qemu_fdt_setprop_strings: set a property from a set of strings
+ *
+ * @fdt: pointer to the dt blob
+ * @path: node name
+ * @prop: property array
+ *
+ * This is a helper for the qemu_fdt_setprop_string_array() function
+ * which takes a va-arg set of strings instead of having to setup a
+ * single use string array.
+ */
+#define qemu_fdt_setprop_strings(fdt, path, prop, ...)  \
+({ int __ret; do {  \
+static const char * const __strs[] = { __VA_ARGS__ };   \
+__ret = qemu_fdt_setprop_string_array(fdt, path, prop,  \
+(char **)&__strs, ARRAY_SIZE(__strs));  \
+ } while(0); __ret; })
+
+
 int qemu_fdt_setprop_phandle(void *fdt, const char *node_path,
  const char *property,
  const char *target_node_path);
-- 
2.35.1




[PATCH v3 5/5] hw/arm: change to use qemu_fdt_setprop_strings()

2022-07-27 Thread Ben Dooks
Change to using qemu_fdt_setprop_strings() instead of using
\0 separated string arrays.

Signed-off-by: Ben Dooks 
---
 hw/arm/boot.c |  8 +++---
 hw/arm/virt.c | 28 +
 hw/arm/xlnx-versal-virt.c | 51 ---
 3 files changed, 37 insertions(+), 50 deletions(-)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index ada2717f76..bf29b7ae60 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -490,11 +490,11 @@ static void fdt_add_psci_node(void *fdt)
 qemu_fdt_add_subnode(fdt, "/psci");
 if (armcpu->psci_version >= QEMU_PSCI_VERSION_0_2) {
 if (armcpu->psci_version < QEMU_PSCI_VERSION_1_0) {
-const char comp[] = "arm,psci-0.2\0arm,psci";
-qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
+qemu_fdt_setprop_strings(fdt, "/psci", "compatible",
+ "arm,psci-0.2", "arm,psci"); 
 } else {
-const char comp[] = "arm,psci-1.0\0arm,psci-0.2\0arm,psci";
-qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
+qemu_fdt_setprop_strings(fdt, "/psci", "compatible",
+ "arm,psci-1.0", "arm,psci-0.2", 
"arm,psci");
 }
 
 cpu_off_fn = QEMU_PSCI_0_2_FN_CPU_OFF;
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 9633f822f3..2670c13ef1 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -343,9 +343,8 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms)
 
 armcpu = ARM_CPU(qemu_get_cpu(0));
 if (arm_feature(>env, ARM_FEATURE_V8)) {
-const char compat[] = "arm,armv8-timer\0arm,armv7-timer";
-qemu_fdt_setprop(ms->fdt, "/timer", "compatible",
- compat, sizeof(compat));
+qemu_fdt_setprop_strings(ms->fdt, "/timer", "compatible",
+ "arm,armv8-timer", "arm,armv7-timer");
 } else {
 qemu_fdt_setprop_string(ms->fdt, "/timer", "compatible",
 "arm,armv7-timer");
@@ -843,8 +842,6 @@ static void create_uart(const VirtMachineState *vms, int 
uart,
 hwaddr base = vms->memmap[uart].base;
 hwaddr size = vms->memmap[uart].size;
 int irq = vms->irqmap[uart];
-const char compat[] = "arm,pl011\0arm,primecell";
-const char clocknames[] = "uartclk\0apb_pclk";
 DeviceState *dev = qdev_new(TYPE_PL011);
 SysBusDevice *s = SYS_BUS_DEVICE(dev);
 MachineState *ms = MACHINE(vms);
@@ -858,8 +855,8 @@ static void create_uart(const VirtMachineState *vms, int 
uart,
 nodename = g_strdup_printf("/pl011@%" PRIx64, base);
 qemu_fdt_add_subnode(ms->fdt, nodename);
 /* Note that we can't use setprop_string because of the embedded NUL */
-qemu_fdt_setprop(ms->fdt, nodename, "compatible",
- compat, sizeof(compat));
+qemu_fdt_setprop_strings(ms->fdt, nodename, "compatible",
+ "arm,pl011", "arm,primecell");
 qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg",
  2, base, 2, size);
 qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts",
@@ -867,8 +864,8 @@ static void create_uart(const VirtMachineState *vms, int 
uart,
GIC_FDT_IRQ_FLAGS_LEVEL_HI);
 qemu_fdt_setprop_cells(ms->fdt, nodename, "clocks",
vms->clock_phandle, vms->clock_phandle);
-qemu_fdt_setprop(ms->fdt, nodename, "clock-names",
- clocknames, sizeof(clocknames));
+qemu_fdt_setprop_strings(ms->fdt, nodename, "clock-names",
+ "uartclk", "apb_pclk");
 
 if (uart == VIRT_UART) {
 qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename);
@@ -890,14 +887,14 @@ static void create_rtc(const VirtMachineState *vms)
 hwaddr base = vms->memmap[VIRT_RTC].base;
 hwaddr size = vms->memmap[VIRT_RTC].size;
 int irq = vms->irqmap[VIRT_RTC];
-const char compat[] = "arm,pl031\0arm,primecell";
 MachineState *ms = MACHINE(vms);
 
 sysbus_create_simple("pl031", base, qdev_get_gpio_in(vms->gic, irq));
 
 nodename = g_strdup_printf("/pl031@%" PRIx64, base);
 qemu_fdt_add_subnode(ms->fdt, nodename);
-qemu_fdt_setprop(ms->fdt, nodename, "compatible", compat, sizeof(compat));
+qemu_fdt_setprop_strings(ms->fdt, nodename, "compatible",
+ "arm,pl031", "arm,primecell");
 qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg",
  2, base, 2, size);
 qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts",
@@ -983,7 +980,6 @@ static void create_gpio_devices(const VirtMachineState 
*vms, int gpio,
 hwaddr base = vms->memmap[gpio].base;
 hwaddr size = vms->memmap[gpio].size;
 int irq = vms->irqmap[gpio];
-const char compat[] = "arm,pl061\0arm,primecell";
 SysBusDevice *s;
 MachineState *ms = MACHINE(vms);
 
@@ 

[PATCH v3 3/5] hw/core: use qemu_fdt_setprop_strings()

2022-07-27 Thread Ben Dooks
Change to using the qemu_fdt_setprop_strings() helper in
hw/core code.

Signed-off-by: Ben Dooks 
---
 hw/core/guest-loader.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/hw/core/guest-loader.c b/hw/core/guest-loader.c
index 391c875a29..203090503e 100644
--- a/hw/core/guest-loader.c
+++ b/hw/core/guest-loader.c
@@ -56,10 +56,8 @@ static void loader_insert_platform_data(GuestLoaderState *s, 
int size,
 qemu_fdt_setprop(fdt, node, "reg", _attr, sizeof(reg_attr));
 
 if (s->kernel) {
-const char *compat[2] = { "multiboot,module", "multiboot,kernel" };
-if (qemu_fdt_setprop_string_array(fdt, node, "compatible",
-  (char **) ,
-  ARRAY_SIZE(compat)) < 0) {
+if (qemu_fdt_setprop_strings(fdt, node, "compatible",
+ "multiboot,module", "multiboot,kernel") < 
0) {
 error_setg(errp, "couldn't set %s/compatible", node);
 return;
 }
@@ -69,10 +67,8 @@ static void loader_insert_platform_data(GuestLoaderState *s, 
int size,
 }
 }
 } else if (s->initrd) {
-const char *compat[2] = { "multiboot,module", "multiboot,ramdisk" };
-if (qemu_fdt_setprop_string_array(fdt, node, "compatible",
-  (char **) ,
-  ARRAY_SIZE(compat)) < 0) {
+if (qemu_fdt_setprop_strings(fdt, node, "compatible",
+ "multiboot,module", "multiboot,ramdisk") 
< 0) {
 error_setg(errp, "couldn't set %s/compatible", node);
 return;
 }
-- 
2.35.1




Re: [PATCH v11 2/6] target/riscv: Simplify counter predicate function

2022-07-27 Thread Atish Kumar Patra
On Wed, Jul 27, 2022 at 1:35 AM Weiwei Li  wrote:

>
> 在 2022/7/27 下午2:49, Atish Patra 写道:
> > All the hpmcounters and the fixed counters (CY, IR, TM) can be
> represented
> > as a unified counter. Thus, the predicate function doesn't need handle
> each
> > case separately.
> >
> > Simplify the predicate function so that we just handle things differently
> > between RV32/RV64 and S/HS mode.
> >
> > Reviewed-by: Bin Meng 
> > Acked-by: Alistair Francis 
> > Signed-off-by: Atish Patra 
> > ---
> >   target/riscv/csr.c | 112 +
> >   1 file changed, 11 insertions(+), 101 deletions(-)
> >
> > diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> > index 1233bfa0a726..57dbbf9b09a0 100644
> > --- a/target/riscv/csr.c
> > +++ b/target/riscv/csr.c
> > @@ -74,6 +74,7 @@ static RISCVException ctr(CPURISCVState *env, int
> csrno)
> >   CPUState *cs = env_cpu(env);
> >   RISCVCPU *cpu = RISCV_CPU(cs);
> >   int ctr_index;
> > +target_ulong ctr_mask;
> >   int base_csrno = CSR_CYCLE;
> >   bool rv32 = riscv_cpu_mxl(env) == MXL_RV32 ? true : false;
> >
> > @@ -82,122 +83,31 @@ static RISCVException ctr(CPURISCVState *env, int
> csrno)
> >   base_csrno += 0x80;
> >   }
> >   ctr_index = csrno - base_csrno;
> > +ctr_mask = BIT(ctr_index);
> >
> >   if ((csrno >= CSR_CYCLE && csrno <= CSR_INSTRET) ||
> >   (csrno >= CSR_CYCLEH && csrno <= CSR_INSTRETH)) {
> >   goto skip_ext_pmu_check;
> >   }
> >
> > -if ((!cpu->cfg.pmu_num || !(cpu->pmu_avail_ctrs & BIT(ctr_index
> {
> > +if (!(cpu->pmu_avail_ctrs & ctr_mask)) {
> >   /* No counter is enabled in PMU or the counter is out of range
> */
> >   return RISCV_EXCP_ILLEGAL_INST;
> >   }
> >
> >   skip_ext_pmu_check:
> >
> > -if (env->priv == PRV_S) {
> > -switch (csrno) {
> > -case CSR_CYCLE:
> > -if (!get_field(env->mcounteren, COUNTEREN_CY)) {
> > -return RISCV_EXCP_ILLEGAL_INST;
> > -}
> > -break;
> > -case CSR_TIME:
> > -if (!get_field(env->mcounteren, COUNTEREN_TM)) {
> > -return RISCV_EXCP_ILLEGAL_INST;
> > -}
> > -break;
> > -case CSR_INSTRET:
> > -if (!get_field(env->mcounteren, COUNTEREN_IR)) {
> > -return RISCV_EXCP_ILLEGAL_INST;
> > -}
> > -break;
> > -case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
> > -if (!get_field(env->mcounteren, 1 << ctr_index)) {
> > -return RISCV_EXCP_ILLEGAL_INST;
> > -}
> > -break;
> > -}
> > -if (rv32) {
> > -switch (csrno) {
> > -case CSR_CYCLEH:
> > -if (!get_field(env->mcounteren, COUNTEREN_CY)) {
> > -return RISCV_EXCP_ILLEGAL_INST;
> > -}
> > -break;
> > -case CSR_TIMEH:
> > -if (!get_field(env->mcounteren, COUNTEREN_TM)) {
> > -return RISCV_EXCP_ILLEGAL_INST;
> > -}
> > -break;
> > -case CSR_INSTRETH:
> > -if (!get_field(env->mcounteren, COUNTEREN_IR)) {
> > -return RISCV_EXCP_ILLEGAL_INST;
> > -}
> > -break;
> > -case CSR_HPMCOUNTER3H...CSR_HPMCOUNTER31H:
> > -if (!get_field(env->mcounteren, 1 << ctr_index)) {
> > -return RISCV_EXCP_ILLEGAL_INST;
> > -}
> > -break;
> > -}
> > -}
> > +if (((env->priv == PRV_S) && (!get_field(env->mcounteren,
> ctr_mask))) ||
> > +   ((env->priv == PRV_U) && (!get_field(env->scounteren,
> ctr_mask {
> > +return RISCV_EXCP_ILLEGAL_INST;
> >   }
> >
> >   if (riscv_cpu_virt_enabled(env)) {
> > -switch (csrno) {
> > -case CSR_CYCLE:
> > -if (!get_field(env->hcounteren, COUNTEREN_CY) &&
> > -get_field(env->mcounteren, COUNTEREN_CY)) {
> > -return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
> > -}
> > -break;
> > -case CSR_TIME:
> > -if (!get_field(env->hcounteren, COUNTEREN_TM) &&
> > -get_field(env->mcounteren, COUNTEREN_TM)) {
> > -return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
> > -}
> > -break;
> > -case CSR_INSTRET:
> > -if (!get_field(env->hcounteren, COUNTEREN_IR) &&
> > -get_field(env->mcounteren, COUNTEREN_IR)) {
> > -return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
> > -}
> > -break;
> > -case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
> > -if (!get_field(env->hcounteren, 1 << ctr_index) &&
> > - get_field(env->mcounteren, 1 << ctr_index)) {
> > -return 

Re: [PATCH v11 1/6] target/riscv: Add sscofpmf extension support

2022-07-27 Thread Atish Kumar Patra
On Wed, Jul 27, 2022 at 1:11 AM Weiwei Li  wrote:

>
> 在 2022/7/27 下午2:49, Atish Patra 写道:
> > The Sscofpmf ('Ss' for Privileged arch and Supervisor-level extensions,
> > and 'cofpmf' for Count OverFlow and Privilege Mode Filtering)
> > extension allows the perf to handle overflow interrupts and filtering
> > support. This patch provides a framework for programmable
> > counters to leverage the extension. As the extension doesn't have any
> > provision for the overflow bit for fixed counters, the fixed events
> > can also be monitoring using programmable counters. The underlying
> > counters for cycle and instruction counters are always running. Thus,
> > a separate timer device is programmed to handle the overflow.
> >
> > Tested-by: Heiko Stuebner 
> > Signed-off-by: Atish Patra 
> > Signed-off-by: Atish Patra 
> > ---
> >   target/riscv/cpu.c  |  11 ++
> >   target/riscv/cpu.h  |  25 +++
> >   target/riscv/cpu_bits.h |  55 +++
> >   target/riscv/csr.c  | 166 ++-
> >   target/riscv/machine.c  |   1 +
> >   target/riscv/pmu.c  | 357 +++-
> >   target/riscv/pmu.h  |   7 +
> >   7 files changed, 611 insertions(+), 11 deletions(-)
> >
> > diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> > index 1bb3973806d2..c1d62b81a725 100644
> > --- a/target/riscv/cpu.c
> > +++ b/target/riscv/cpu.c
> > @@ -22,6 +22,7 @@
> >   #include "qemu/ctype.h"
> >   #include "qemu/log.h"
> >   #include "cpu.h"
> > +#include "pmu.h"
> >   #include "internals.h"
> >   #include "exec/exec-all.h"
> >   #include "qapi/error.h"
> > @@ -779,6 +780,15 @@ static void riscv_cpu_realize(DeviceState *dev,
> Error **errp)
> >   set_misa(env, env->misa_mxl, ext);
> >   }
> >
> > +#ifndef CONFIG_USER_ONLY
> > +if (cpu->cfg.pmu_num) {
> > +if (!riscv_pmu_init(cpu, cpu->cfg.pmu_num) &&
> cpu->cfg.ext_sscofpmf) {
> > +cpu->pmu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> > +  riscv_pmu_timer_cb, cpu);
> > +}
> > + }
> > +#endif
> > +
> >   riscv_cpu_register_gdb_regs_for_features(cs);
> >
> >   qemu_init_vcpu(cs);
> > @@ -883,6 +893,7 @@ static Property riscv_cpu_extensions[] = {
> >   DEFINE_PROP_BOOL("v", RISCVCPU, cfg.ext_v, false),
> >   DEFINE_PROP_BOOL("h", RISCVCPU, cfg.ext_h, true),
> >   DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
> > +DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false),
> >   DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
> >   DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
> >   DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false),
> > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> > index 5c7acc055ac9..db193c3d 100644
> > --- a/target/riscv/cpu.h
> > +++ b/target/riscv/cpu.h
> > @@ -137,6 +137,8 @@ typedef struct PMUCTRState {
> >   /* Snapshort value of a counter in RV32 */
> >   target_ulong mhpmcounterh_prev;
> >   bool started;
> > +/* Value beyond UINT32_MAX/UINT64_MAX before overflow interrupt
> trigger */
> > +target_ulong irq_overflow_left;
> >   } PMUCTRState;
> >
> >   struct CPUArchState {
> > @@ -297,6 +299,9 @@ struct CPUArchState {
> >   /* PMU event selector configured values. First three are unused*/
> >   target_ulong mhpmevent_val[RV_MAX_MHPMEVENTS];
> >
> > +/* PMU event selector configured values for RV32*/
> > +target_ulong mhpmeventh_val[RV_MAX_MHPMEVENTS];
> > +
> >   target_ulong sscratch;
> >   target_ulong mscratch;
> >
> > @@ -433,6 +438,7 @@ struct RISCVCPUConfig {
> >   bool ext_zve32f;
> >   bool ext_zve64f;
> >   bool ext_zmmul;
> > +bool ext_sscofpmf;
> >   bool rvv_ta_all_1s;
> >
> >   uint32_t mvendorid;
> > @@ -479,6 +485,12 @@ struct ArchCPU {
> >
> >   /* Configuration Settings */
> >   RISCVCPUConfig cfg;
> > +
> > +QEMUTimer *pmu_timer;
> > +/* A bitmask of Available programmable counters */
> > +uint32_t pmu_avail_ctrs;
> > +/* Mapping of events to counters */
> > +GHashTable *pmu_event_ctr_map;
> >   };
> >
> >   static inline int riscv_has_ext(CPURISCVState *env, target_ulong ext)
> > @@ -738,6 +750,19 @@ enum {
> >   CSR_TABLE_SIZE = 0x1000
> >   };
> >
> > +/**
> > + * The event id are encoded based on the encoding specified in the
> > + * SBI specification v0.3
> > + */
> > +
> > +enum riscv_pmu_event_idx {
> > +RISCV_PMU_EVENT_HW_CPU_CYCLES = 0x01,
> > +RISCV_PMU_EVENT_HW_INSTRUCTIONS = 0x02,
> > +RISCV_PMU_EVENT_CACHE_DTLB_READ_MISS = 0x10019,
> > +RISCV_PMU_EVENT_CACHE_DTLB_WRITE_MISS = 0x1001B,
> > +RISCV_PMU_EVENT_CACHE_ITLB_PREFETCH_MISS = 0x10021,
> > +};
> > +
> >   /* CSR function table */
> >   extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE];
> >
> > diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> > index 6be5a9e9f046..b63c586be563 100644
> > --- 

Re: [RFC PATCH 8/8] tests/docker: Selective line reading by python script

2022-07-27 Thread Philippe Mathieu-Daudé via
+Erik/Daniel

On Wed, Jul 27, 2022 at 6:37 PM Lucas Mateus Castro(alqotel)
 wrote:
>
> Building some images failed on ppc64le because the dockerfile tried to
> install some packages that are only available in x86 and arm64, to solve
> this while still having those packages be available in those architectures
> a comment was put before the installation command to instruct the python
> script into ignoring those lines for some architectures (in this case
> ppc64le)
>
> Overall I'm not a big fan of the way I solved this problem, so I'd like
> to know if anyone has a better way to make these dockerfilse work in
> PPC64LE.
>
> For context the base images used here are available in PPC64LE but some
> of the packages installed are not (in alpine's case it's XEN, which is
> only available to x86 and ARM), so this patch create a ignore_list which
> is set on a per-architecture basis, and any packages in a dockerfile in
> this ignore_list will not be copied to the temporary dockerfile used in
> the docker command.

Shouldn't this be done on lcitool side?
(https://gitlab.com/libvirt/libvirt-ci/-/tree/master/lcitool)

> Signed-off-by: Lucas Mateus Castro(alqotel) 
> ---
>  tests/docker/docker.py | 15 ---
>  tests/docker/dockerfiles/alpine.docker |  2 ++
>  2 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/tests/docker/docker.py b/tests/docker/docker.py
> index d0af2861b8..9b962d1c78 100755
> --- a/tests/docker/docker.py
> +++ b/tests/docker/docker.py
> @@ -14,6 +14,7 @@
>  import os
>  import sys
>  import subprocess
> +import platform
>  import json
>  import hashlib
>  import atexit
> @@ -207,8 +208,15 @@ def _read_qemu_dockerfile(img_name):
>
>  def _dockerfile_preprocess(df):
>  out = ""
> +ignore_list = []
>  for l in df.splitlines():
> -if len(l.strip()) == 0 or l.startswith("#"):
> +if len(l.strip()) == 0:
> +continue
> +if l.startswith("#"):
> +if len(l.split()) >= 3:
> +if l.split()[1] == "ignore":
> +if platform.processor() in l.split()[2].split(','):
> +ignore_list += l.split()[3].split(',')
>  continue
>  from_pref = "FROM qemu/"
>  if l.startswith(from_pref):
> @@ -219,7 +227,8 @@ def _dockerfile_preprocess(df):
>  inlining = _read_qemu_dockerfile(l[len(from_pref):])
>  out += _dockerfile_preprocess(inlining)
>  continue
> -out += l + "\n"
> +if not any(x in l.split() for x in ignore_list):
> +out += l + "\n"
>  return out
>
>
> @@ -330,7 +339,7 @@ def build_image(self, tag, docker_dir, dockerfile,
>  tmp_df = tempfile.NamedTemporaryFile(mode="w+t",
>   encoding='utf-8',
>   dir=docker_dir, 
> suffix=".docker")
> -tmp_df.write(dockerfile)
> +tmp_df.write(_dockerfile_preprocess(dockerfile))
>
>  if user:
>  uid = os.getuid()
> diff --git a/tests/docker/dockerfiles/alpine.docker 
> b/tests/docker/dockerfiles/alpine.docker
> index 2943a99730..5cec46d8f2 100644
> --- a/tests/docker/dockerfiles/alpine.docker
> +++ b/tests/docker/dockerfiles/alpine.docker
> @@ -6,6 +6,8 @@
>
>  FROM docker.io/library/alpine:edge
>
> +# Lines to by ignored when this file is read by the python script
> +# ignore ppc64le,ppc64 xen-dev
>  RUN apk update && \
>  apk upgrade && \
>  apk add \
> --
> 2.25.1
>



Re: When create a new qemu fork, can not run pipeline, what I need to do?

2022-07-27 Thread Philippe Mathieu-Daudé via
Cc'ing build and test automation maintainers.

On Wed, Jul 27, 2022 at 1:22 PM 罗勇刚(Yonggang Luo)  wrote:
>
> ···
> Pipeline cannot be run.
>
> No stages / jobs for this pipeline.
>
> The form contains the following warning:
>
> 121 warnings found: showing first 25
>
> jobs:amd64-centos8-container may allow multiple pipelines to run for a single 
> action due to `rules:when` clause with no `workflow:rules` - read more: 
> https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:amd64-fedora-container may allow multiple pipelines to run for a single 
> action due to `rules:when` clause with no `workflow:rules` - read more: 
> https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:amd64-debian10-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:alpha-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:amd64-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:amd64-debian-user-cross-container may allow multiple pipelines to run 
> for a single action due to `rules:when` clause with no `workflow:rules` - 
> read more: 
> https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:arm64-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:armel-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:armhf-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:hexagon-cross-container may allow multiple pipelines to run for a single 
> action due to `rules:when` clause with no `workflow:rules` - read more: 
> https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:hppa-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:m68k-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:mips64-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:mips64el-debian-cross-container may allow multiple pipelines to run for 
> a single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:mips-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:mipsel-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:powerpc-test-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:ppc64el-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:riscv64-debian-cross-container may allow multiple pipelines to run for a 
> single action due to `rules:when` clause with no `workflow:rules` - read 
> more: https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:riscv64-debian-test-cross-container may allow multiple pipelines to run 
> for a single action due to `rules:when` clause with no `workflow:rules` - 
> read more: 
> https://docs.gitlab.com/ee/ci/troubleshooting.html#pipeline-warnings
>
> jobs:s390x-debian-cross-container may allow 

[PATCH 0/2] linux-user: Improve strace output

2022-07-27 Thread Helge Deller
Two patches which improve the linux-user strace output:
a) Add some missing signal names
b) Add missing clock_gettime64() syscall

Helge Deller (2):
  linux-user: Add missing signals in strace output
  linux-user: Add missing clock_gettime64() syscall strace

 linux-user/signal-common.h | 46 +
 linux-user/signal.c| 37 ++---
 linux-user/strace.c| 84 +-
 linux-user/strace.list |  4 ++
 4 files changed, 117 insertions(+), 54 deletions(-)

--
2.35.3




[PATCH 1/2] linux-user: Add missing signals in strace output

2022-07-27 Thread Helge Deller
Some of the guest signal numbers are currently not converted to
their representative names in the strace output, e.g. SIGVTALRM.

This patch introduces a smart way to generate and keep in sync the
host-to-guest and guest-to-host signal conversion tables for usage in
the qemu signal and strace code. This ensures that any signals
will now show up in both tables.

There is no functional change in this patch - with the exception that yet
missing signal names now show up in the strace code too.

Signed-off-by: Helge Deller 
---
 linux-user/signal-common.h | 46 ++
 linux-user/signal.c| 37 +++---
 linux-user/strace.c| 31 +
 3 files changed, 60 insertions(+), 54 deletions(-)

diff --git a/linux-user/signal-common.h b/linux-user/signal-common.h
index 6a7e4a93fc..c2549bcd3e 100644
--- a/linux-user/signal-common.h
+++ b/linux-user/signal-common.h
@@ -118,4 +118,50 @@ static inline void finish_sigsuspend_mask(int ret)
 }
 }

+#ifdef SIGSTKFLT
+#define MAKE_SIG_ENTRY_SIGSTKFLTMAKE_SIG_ENTRY(SIGSTKFLT)
+#else
+#define MAKE_SIG_ENTRY_SIGSTKFLT
+#endif
+
+#ifdef SIGIOT
+#define MAKE_SIG_ENTRY_SIGIOT   MAKE_SIG_ENTRY(SIGIOT)
+#else
+#define MAKE_SIG_ENTRY_SIGIOT
+#endif
+
+#define MAKE_SIGNAL_LIST \
+   MAKE_SIG_ENTRY(SIGHUP) \
+   MAKE_SIG_ENTRY(SIGINT) \
+   MAKE_SIG_ENTRY(SIGQUIT) \
+   MAKE_SIG_ENTRY(SIGILL) \
+   MAKE_SIG_ENTRY(SIGTRAP) \
+   MAKE_SIG_ENTRY(SIGABRT) \
+   MAKE_SIG_ENTRY(SIGBUS) \
+   MAKE_SIG_ENTRY(SIGFPE) \
+   MAKE_SIG_ENTRY(SIGKILL) \
+   MAKE_SIG_ENTRY(SIGUSR1) \
+   MAKE_SIG_ENTRY(SIGSEGV) \
+   MAKE_SIG_ENTRY(SIGUSR2) \
+   MAKE_SIG_ENTRY(SIGPIPE) \
+   MAKE_SIG_ENTRY(SIGALRM) \
+   MAKE_SIG_ENTRY(SIGTERM) \
+   MAKE_SIG_ENTRY(SIGCHLD) \
+   MAKE_SIG_ENTRY(SIGCONT) \
+   MAKE_SIG_ENTRY(SIGSTOP) \
+   MAKE_SIG_ENTRY(SIGTSTP) \
+   MAKE_SIG_ENTRY(SIGTTIN) \
+   MAKE_SIG_ENTRY(SIGTTOU) \
+   MAKE_SIG_ENTRY(SIGURG) \
+   MAKE_SIG_ENTRY(SIGXCPU) \
+   MAKE_SIG_ENTRY(SIGXFSZ) \
+   MAKE_SIG_ENTRY(SIGVTALRM) \
+   MAKE_SIG_ENTRY(SIGPROF) \
+   MAKE_SIG_ENTRY(SIGWINCH) \
+   MAKE_SIG_ENTRY(SIGIO) \
+   MAKE_SIG_ENTRY(SIGPWR) \
+   MAKE_SIG_ENTRY(SIGSYS) \
+   MAKE_SIG_ENTRY_SIGSTKFLT \
+   MAKE_SIG_ENTRY_SIGIOT
+
 #endif
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 8d29bfaa6b..03b4d5e6ee 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -53,40 +53,9 @@ abi_ulong default_rt_sigreturn;
 QEMU_BUILD_BUG_ON(__SIGRTMAX + 1 != _NSIG);
 #endif
 static uint8_t host_to_target_signal_table[_NSIG] = {
-[SIGHUP] = TARGET_SIGHUP,
-[SIGINT] = TARGET_SIGINT,
-[SIGQUIT] = TARGET_SIGQUIT,
-[SIGILL] = TARGET_SIGILL,
-[SIGTRAP] = TARGET_SIGTRAP,
-[SIGABRT] = TARGET_SIGABRT,
-/*[SIGIOT] = TARGET_SIGIOT,*/
-[SIGBUS] = TARGET_SIGBUS,
-[SIGFPE] = TARGET_SIGFPE,
-[SIGKILL] = TARGET_SIGKILL,
-[SIGUSR1] = TARGET_SIGUSR1,
-[SIGSEGV] = TARGET_SIGSEGV,
-[SIGUSR2] = TARGET_SIGUSR2,
-[SIGPIPE] = TARGET_SIGPIPE,
-[SIGALRM] = TARGET_SIGALRM,
-[SIGTERM] = TARGET_SIGTERM,
-#ifdef SIGSTKFLT
-[SIGSTKFLT] = TARGET_SIGSTKFLT,
-#endif
-[SIGCHLD] = TARGET_SIGCHLD,
-[SIGCONT] = TARGET_SIGCONT,
-[SIGSTOP] = TARGET_SIGSTOP,
-[SIGTSTP] = TARGET_SIGTSTP,
-[SIGTTIN] = TARGET_SIGTTIN,
-[SIGTTOU] = TARGET_SIGTTOU,
-[SIGURG] = TARGET_SIGURG,
-[SIGXCPU] = TARGET_SIGXCPU,
-[SIGXFSZ] = TARGET_SIGXFSZ,
-[SIGVTALRM] = TARGET_SIGVTALRM,
-[SIGPROF] = TARGET_SIGPROF,
-[SIGWINCH] = TARGET_SIGWINCH,
-[SIGIO] = TARGET_SIGIO,
-[SIGPWR] = TARGET_SIGPWR,
-[SIGSYS] = TARGET_SIGSYS,
+#define MAKE_SIG_ENTRY(sig) [sig] = TARGET_##sig,
+   MAKE_SIGNAL_LIST
+#undef MAKE_SIG_ENTRY
 /* next signals stay the same */
 };

diff --git a/linux-user/strace.c b/linux-user/strace.c
index 7d882526da..a217c1025a 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -17,6 +17,7 @@
 #include "qemu.h"
 #include "user-internals.h"
 #include "strace.h"
+#include "signal-common.h"

 struct syscallname {
 int nr;
@@ -141,30 +142,20 @@ if( cmd == val ) { \
 qemu_log("%d", cmd);
 }

+static const char *target_signal_to_host_signal_table[_NSIG] = {
+#define MAKE_SIG_ENTRY(sig) [TARGET_##sig] = #sig,
+MAKE_SIGNAL_LIST
+#undef MAKE_SIG_ENTRY
+};
+
 static void
 print_signal(abi_ulong arg, int last)
 {
 const char *signal_name = NULL;
-switch(arg) {
-case TARGET_SIGHUP: signal_name = "SIGHUP"; break;
-case TARGET_SIGINT: signal_name = "SIGINT"; break;
-case TARGET_SIGQUIT: signal_name = "SIGQUIT"; break;
-case TARGET_SIGILL: signal_name = "SIGILL"; break;
-case TARGET_SIGABRT: signal_name = "SIGABRT"; break;
-case TARGET_SIGFPE: signal_name = "SIGFPE"; break;
-case TARGET_SIGKILL: signal_name = "SIGKILL"; break;
-case 

[PATCH 2/2] linux-user: Add missing clock_gettime64() syscall strace

2022-07-27 Thread Helge Deller
Allow linux-user to strace the clock_gettime64() syscall.
This syscall is used a lot on 32-bit guest architectures which use newer
glibc versions.

Signed-off-by: Helge Deller 
---
 linux-user/strace.c| 53 ++
 linux-user/strace.list |  4 
 2 files changed, 57 insertions(+)

diff --git a/linux-user/strace.c b/linux-user/strace.c
index a217c1025a..27309f1106 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -82,6 +82,7 @@ UNUSED static void print_buf(abi_long addr, abi_long len, int 
last);
 UNUSED static void print_raw_param(const char *, abi_long, int);
 UNUSED static void print_timeval(abi_ulong, int);
 UNUSED static void print_timespec(abi_ulong, int);
+UNUSED static void print_timespec64(abi_ulong, int);
 UNUSED static void print_timezone(abi_ulong, int);
 UNUSED static void print_itimerval(abi_ulong, int);
 UNUSED static void print_number(abi_long, int);
@@ -794,6 +795,24 @@ print_syscall_ret_clock_gettime(CPUArchState *cpu_env, 
const struct syscallname
 #define print_syscall_ret_clock_getres print_syscall_ret_clock_gettime
 #endif

+#if defined(TARGET_NR_clock_gettime64)
+static void
+print_syscall_ret_clock_gettime64(CPUArchState *cpu_env, const struct 
syscallname *name,
+abi_long ret, abi_long arg0, abi_long arg1,
+abi_long arg2, abi_long arg3, abi_long arg4,
+abi_long arg5)
+{
+if (!print_syscall_err(ret)) {
+qemu_log(TARGET_ABI_FMT_ld, ret);
+qemu_log(" (");
+print_timespec64(arg1, 1);
+qemu_log(")");
+}
+
+qemu_log("\n");
+}
+#endif
+
 #ifdef TARGET_NR_gettimeofday
 static void
 print_syscall_ret_gettimeofday(CPUArchState *cpu_env, const struct syscallname 
*name,
@@ -1651,6 +1670,27 @@ print_timespec(abi_ulong ts_addr, int last)
 }
 }

+static void
+print_timespec64(abi_ulong ts_addr, int last)
+{
+if (ts_addr) {
+struct target__kernel_timespec *ts;
+
+ts = lock_user(VERIFY_READ, ts_addr, sizeof(*ts), 1);
+if (!ts) {
+print_pointer(ts_addr, last);
+return;
+}
+qemu_log("{tv_sec = %lld"
+ ",tv_nsec = %lld}%s",
+ (long long)tswap64(ts->tv_sec), (long 
long)tswap64(ts->tv_nsec),
+ get_comma(last));
+unlock_user(ts, ts_addr, 0);
+} else {
+qemu_log("NULL%s", get_comma(last));
+}
+}
+
 static void
 print_timezone(abi_ulong tz_addr, int last)
 {
@@ -2266,6 +2306,19 @@ print_clock_gettime(CPUArchState *cpu_env, const struct 
syscallname *name,
 #define print_clock_getres print_clock_gettime
 #endif

+#if defined(TARGET_NR_clock_gettime64)
+static void
+print_clock_gettime64(CPUArchState *cpu_env, const struct syscallname *name,
+abi_long arg0, abi_long arg1, abi_long arg2,
+abi_long arg3, abi_long arg4, abi_long arg5)
+{
+print_syscall_prologue(name);
+print_enums(clockids, arg0, 0);
+print_pointer(arg1, 1);
+print_syscall_epilogue(name);
+}
+#endif
+
 #ifdef TARGET_NR_clock_settime
 static void
 print_clock_settime(CPUArchState *cpu_env, const struct syscallname *name,
diff --git a/linux-user/strace.list b/linux-user/strace.list
index 72e17b1acf..a78cdf3cdf 100644
--- a/linux-user/strace.list
+++ b/linux-user/strace.list
@@ -1676,3 +1676,7 @@
 #ifdef TARGET_NR_copy_file_range
 { TARGET_NR_copy_file_range, "copy_file_range", 
"%s(%d,%p,%d,%p,"TARGET_ABI_FMT_lu",%u)", NULL, NULL },
 #endif
+#ifdef TARGET_NR_clock_gettime64
+{ TARGET_NR_clock_gettime64, "clock_gettime64" , NULL, print_clock_gettime64,
+   print_syscall_ret_clock_gettime64 },
+#endif
--
2.35.3




[PATCH] WHPX: Add support for device backed memory regions

2022-07-27 Thread Aidan Khoury
Due to skipping the mapping of read only device memory, Windows
Hypervisor Platform would fail to emulate such memory accesses when booting
OVMF EDK2 firmware. This patch adds ROM device memory region support
for WHPX since the Windows Hypervisor Platform supports mapping read-only
device memory, which allows successful booting of OVMF EDK2 firmware.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/513
  https://gitlab.com/qemu-project/qemu/-/issues/934
Buglink: https://bugs.launchpad.net/bugs/1821595

Signed-off-by: Aidan Khoury 
---
 target/i386/whpx/whpx-all.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index b22a3314b4..7a61df1135 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -2363,11 +2363,18 @@ static void whpx_process_section(MemoryRegionSection 
*section, int add)
 MemoryRegion *mr = section->mr;
 hwaddr start_pa = section->offset_within_address_space;
 ram_addr_t size = int128_get64(section->size);
+bool is_romd = false;
 unsigned int delta;
 uint64_t host_va;
 
 if (!memory_region_is_ram(mr)) {
-return;
+if (memory_region_is_romd(mr)) {
+is_romd = true;
+warn_report("WHPX: ROMD region 0x%016" PRIx64 "->0x%016" PRIx64,
+start_pa, start_pa + size);
+} else {
+return;
+}
 }
 
 delta = qemu_real_host_page_size() - (start_pa & 
~qemu_real_host_page_mask());
@@ -2386,7 +2393,7 @@ static void whpx_process_section(MemoryRegionSection 
*section, int add)
 + section->offset_within_region + delta;
 
 whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
-memory_region_is_rom(mr), mr->name);
+memory_region_is_rom(mr) || is_romd, mr->name);
 }
 
 static void whpx_region_add(MemoryListener *listener,
-- 
2.37.1





Re: [PULL 14/15] qdev: Base object creation on QDict rather than QemuOpts

2022-07-27 Thread Kevin Wolf
Am 07.07.2022 um 22:24 hat Peter Maydell geschrieben:
> On Mon, 4 Jul 2022 at 05:50, Markus Armbruster  wrote:
> > My initial (knee-jerk) reaction to breaking array properties: Faster,
> > Pussycat! Kill! Kill!
> 
> In an ideal world, what would you replace them with?

The next (and final) patch in this pull request added JSON syntax for
-device, so we can actually have proper list properties now that are
accessed with the normal list visitors. I suppose some integration with
the qdev property system is still missing, but on the QOM level it could
be used.

In the ideal world, we would also be able to replace the human CLI
syntax so that JSON isn't required for this, but doing this in reality
would probably cause new compatibility problems - though libvirt has
been using JSON for a while, so I guess it wouldn't mind an incompatible
change of human syntax.

Kevin




Re: [RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-07-27 Thread Kevin Wolf
Am 08.07.2022 um 06:17 hat Stefan Hajnoczi geschrieben:
> libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
> high-performance disk I/O. It currently supports io_uring and
> virtio-blk-vhost-vdpa with additional drivers under development.
> 
> One of the reasons for developing libblkio is that other applications
> besides QEMU can use it. This will be particularly useful for
> vhost-user-blk which applications may wish to use for connecting to
> qemu-storage-daemon.
> 
> libblkio also gives us an opportunity to develop in Rust behind a C API
> that is easy to consume from QEMU.
> 
> This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
> using libblkio. It will be easy to add other libblkio drivers since they
> will share the majority of code.
> 
> For now I/O buffers are copied through bounce buffers if the libblkio
> driver requires it. Later commits add an optimization for
> pre-registering guest RAM to avoid bounce buffers.
> 
> The syntax is:
> 
>   --blockdev 
> io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off
> 
> and:
> 
>   --blockdev 
> virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off
> 
> Signed-off-by: Stefan Hajnoczi 

The subject line implies only io_uring, but you actually add vhost-vdpa
support, too. I think the subject line should be changed.

I think it would also make sense to already implement support for
vhost-user-blk on the QEMU side even if support isn't compiled in
libblkio by default and opening vhost-user-blk images would therefore
always fail with a default build.

But then you could run QEMU with a custom build of libblkio to make use
of it without patching QEMU. This is probably useful for getting libvirt
support for using a storage daemon implemented without having to wait
for another QEMU release. (Peter, do you have any opinion on this?)

Kevin




[PULL 2/2] iotests/131: Add parallels regression test

2022-07-27 Thread Vladimir Sementsov-Ogievskiy
From: Hanna Reitz 

Test an allocating write to a parallels image that has a backing node.
Before HEAD^, doing so used to give me a failed assertion (when the
backing node contains only `42` bytes; the results varies with the value
chosen, for `0` bytes, for example, all I get is EIO).

Signed-off-by: Hanna Reitz 
Reviewed-by: Denis V. Lunev 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20220714132801.72464-3-hre...@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/131 | 35 ++-
 tests/qemu-iotests/131.out | 13 +
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131
index d7456cae5b..a847692b4c 100755
--- a/tests/qemu-iotests/131
+++ b/tests/qemu-iotests/131
@@ -43,7 +43,7 @@ _supported_os Linux
 
 inuse_offset=$((0x2c))
 
-size=64M
+size=$((64 * 1024 * 1024))
 CLUSTER_SIZE=64k
 IMGFMT=parallels
 _make_test_img $size
@@ -70,6 +70,39 @@ _check_test_img
 _check_test_img -r all
 { $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | 
_filter_testdir
 
+echo "== allocate with backing =="
+# Verify that allocating clusters works fine even when there is a backing 
image.
+# Regression test for a bug where we would pass a buffer read from the backing
+# node as a QEMUIOVector object, which could cause anything from I/O errors 
over
+# assertion failures to invalid reads from memory.
+
+# Clear image
+_make_test_img $size
+# Create base image
+TEST_IMG="$TEST_IMG.base" _make_test_img $size
+
+# Write some data to the base image (which would trigger an assertion failure 
if
+# interpreted as a QEMUIOVector)
+$QEMU_IO -c 'write -P 42 0 64k' "$TEST_IMG.base" | _filter_qemu_io
+
+# Parallels does not seem to support storing a backing filename in the image
+# itself, so we need to build our backing chain on the command line
+imgopts="driver=$IMGFMT,file.driver=$IMGPROTO,file.filename=$TEST_IMG"
+imgopts+=",backing.driver=$IMGFMT"
+imgopts+=",backing.file.driver=$IMGPROTO,backing.file.filename=$TEST_IMG.base"
+
+# Cause allocation in the top image
+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT \
+$QEMU_IO --image-opts "$imgopts" -c 'write -P 1 0 64' | _filter_qemu_io
+
+# Verify
+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT \
+$QEMU_IO --image-opts "$imgopts" \
+-c 'read -P 1 0 64' \
+-c "read -P 42 64 $((64 * 1024 - 64))" \
+-c "read -P 0 64k $((size - 64 * 1024))" \
+| _filter_qemu_io
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out
index 70da03dee5..de5ef7a8f5 100644
--- a/tests/qemu-iotests/131.out
+++ b/tests/qemu-iotests/131.out
@@ -37,4 +37,17 @@ Double checking the fixed image now...
 No errors were found on the image.
 read 65536/65536 bytes at offset 65536
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== allocate with backing ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 64/64 bytes at offset 0
+64 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 64/64 bytes at offset 0
+64 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65472/65472 bytes at offset 64
+63.938 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 67043328/67043328 bytes at offset 65536
+63.938 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
-- 
2.25.1




[PULL 1/2] block/parallels: Fix buffer-based write call

2022-07-27 Thread Vladimir Sementsov-Ogievskiy
From: Hanna Reitz 

Commit a4072543ccdddbd241d5962d9237b8b41fd006bf has changed the I/O here
from working on a local one-element I/O vector to just using the buffer
directly (using the bdrv_co_pread()/bdrv_co_pwrite() helper functions
introduced shortly before).

However, it only changed the bdrv_co_preadv() call to bdrv_co_pread() -
the subsequent bdrv_co_pwritev() call stayed this way, and so still
expects a QEMUIOVector pointer instead of a plain buffer.  We must
change that to be a bdrv_co_pwrite() call.

Fixes: a4072543ccdddbd241d5962d ("block/parallels: use buffer-based io")
Signed-off-by: Hanna Reitz 
Reviewed-by: Denis V. Lunev 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20220714132801.72464-2-hre...@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/parallels.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 8b235b9505..a229c06f25 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -241,8 +241,8 @@ static int64_t allocate_clusters(BlockDriverState *bs, 
int64_t sector_num,
 return ret;
 }
 
-ret = bdrv_co_pwritev(bs->file, s->data_end * BDRV_SECTOR_SIZE,
-  nb_cow_bytes, buf, 0);
+ret = bdrv_co_pwrite(bs->file, s->data_end * BDRV_SECTOR_SIZE,
+ nb_cow_bytes, buf, 0);
 qemu_vfree(buf);
 if (ret < 0) {
 return ret;
-- 
2.25.1




[PULL 0/2] block: fix parallels block driver

2022-07-27 Thread Vladimir Sementsov-Ogievskiy
The following changes since commit f6cce6bcb2ef959cdd4da0e368f7c72045f21d6d:

  Merge tag 'pull-target-arm-20220726' of 
https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-07-26 
08:32:01 -0700)

are available in the Git repository at:

  https://gitlab.com/vsementsov/qemu.git tags/pull-block-2022-07-27

for you to fetch changes up to 0c2cb3827e46dc30cd41eeb38f8e318eb665e6a4:

  iotests/131: Add parallels regression test (2022-07-26 22:05:20 +0300)


Block: fix parallels block driver



Hanna Reitz (2):
  block/parallels: Fix buffer-based write call
  iotests/131: Add parallels regression test

 block/parallels.c  |  4 ++--
 tests/qemu-iotests/131 | 35 ++-
 tests/qemu-iotests/131.out | 13 +
 3 files changed, 49 insertions(+), 3 deletions(-)

-- 
2.25.1




Re: [PATCH v5 5/8] blockdev: Add a new IF type IF_OTHER

2022-07-27 Thread Kevin Wolf
Am 18.07.2022 um 11:49 hat Markus Armbruster geschrieben:
> Hao Wu  writes:
> 
> > This type is used to represent block devs that are not suitable to
> > be represented by other existing types.
> >
> > A sample use is to represent an at24c eeprom device defined in
> > hw/nvram/eeprom_at24c.c. The block device can be used to contain the
> > content of the said eeprom device.
> >
> > Signed-off-by: Hao Wu 
> 
> Let me add a bit more history in the hope of helping other reviewers.
> 
> v3 of this series[1] used IF_NONE for the EEPROM device.
> 
> Peter Maydell questioned that[2], and we concluded it's wrong.  I wrote
> 
> [A] board can use any traditional interface type.  If none of them
> matches, and common decency prevents use of a non-matching one,
> invent a new one.  We could do IF_OTHER.
> 
> Thomas Huth cleaned up the existing abuse of IF_NONE to use IF_PFLASH
> instead, in commit 6b717a8d44:
> 
> hw/misc/sifive_u_otp: Use IF_PFLASH for the OTP device instead of IF_NONE
> 
> Configuring a drive with "if=none" is meant for creation of a backend
> only, it should not get automatically assigned to a device frontend.
> Use "if=pflash" for the One-Time-Programmable device instead (like
> it is e.g. also done for the efuse device in hw/arm/xlnx-zcu102.c).
> 
> Since the old way of configuring the device has already been published
> with the previous QEMU versions, we cannot remove this immediately, but
> have to deprecate it and support it for at least two more releases.
> 
> Signed-off-by: Thomas Huth 
> Acked-by: Philippe Mathieu-Daudé 
> Reviewed-by: Markus Armbruster 
> Reviewed-by: Alistair Francis 
> Message-id: 2029102549.217755-1-th...@redhat.com
> Signed-off-by: Alistair Francis 
> 
> An OTP device isn't really a parallel flash, and neither are eFuses.
> More fast-and-lose use of IF_PFLASH may exist in the tree, and maybe of
> other interface types, too.
> 
> This patch introduces IF_OTHER.  The patch after next uses it for an
> EEPROM device.
> 
> Do we want IF_OTHER?

What would the semantics even be? Any block device that doesn't pick up
a different category may pick up IF_OTHER backends?

It certainly feels like a strange interface to ask for "other" disk and
then getting as surprise what this other thing might be. It's
essentially the same as having an explicit '-device other', and I
suppose most people would find that strange.

> If no, I guess we get to abuse IF_PFLASH some more.
> 
> If yes, I guess we should use IF_PFLASH only for actual parallel flash
> memory going forward.  Cleaning up existing abuse of IF_PFLASH may not
> be worth the trouble, though.
> 
> Thoughts?

If the existing types aren't good enough (I don't have an opinion on
whether IF_PFLASH is a good match), let's add a new one. But a specific
new one, not just "other".

Kevin




Re: [PATCH] hw/intc: sifive_plic: Fix multi-socket plic configuraiton

2022-07-27 Thread Atish Kumar Patra
On Wed, Jul 27, 2022 at 5:23 AM Alistair Francis 
wrote:

> On Sat, Jul 23, 2022 at 7:22 PM Atish Patra  wrote:
> >
> > Since commit 40244040a7ac, multi-socket configuration with plic is
> > broken as the hartid for second socket is calculated incorrectly.
> > The hartid stored in addr_config already includes the offset
> > for the base hartid for that socket. Adding it again would lead
> > to segfault while creating the plic device for the virt machine.
> > qdev_connect_gpio_out was also invoked with incorrect number of gpio
> > lines.
> >
> > Fixes: 40244040a7ac (hw/intc: sifive_plic: Avoid overflowing the
> addr_config buffer)
> >
> > Signed-off-by: Atish Patra 
> > ---
> >  hw/intc/sifive_plic.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c
> > index 56d60e9ac935..fdac028a521f 100644
> > --- a/hw/intc/sifive_plic.c
> > +++ b/hw/intc/sifive_plic.c
> > @@ -454,10 +454,10 @@ DeviceState *sifive_plic_create(hwaddr addr, char
> *hart_config,
> >
> >  for (i = 0; i < plic->num_addrs; i++) {
> >  int cpu_num = plic->addr_config[i].hartid;
> > -CPUState *cpu = qemu_get_cpu(hartid_base + cpu_num);
> > +CPUState *cpu = qemu_get_cpu(cpu_num);
> >
> >  if (plic->addr_config[i].mode == PLICMode_M) {
> > -qdev_connect_gpio_out(dev, num_harts + cpu_num,
> > +qdev_connect_gpio_out(dev, cpu_num,
>
> Argh!
>
> I was trying to get this ready to go into 7.1. I have been working on
> updating my tests to catch this failure in the future as well.
>
> While testing this change I noticed that it breaks the noMMU test case.
>
> I think the correct fix is actually this (on top of your patch):
>
> diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c
> index fdac028a52..af4ae3630e 100644
> --- a/hw/intc/sifive_plic.c
> +++ b/hw/intc/sifive_plic.c
> @@ -457,7 +457,7 @@ DeviceState *sifive_plic_create(hwaddr addr, char
> *hart_config,
> CPUState *cpu = qemu_get_cpu(cpu_num);
>
> if (plic->addr_config[i].mode == PLICMode_M) {
> -qdev_connect_gpio_out(dev, cpu_num,
> +qdev_connect_gpio_out(dev, num_harts - plic->hartid_base +
> cpu_num,
>   qdev_get_gpio_in(DEVICE(cpu),
> IRQ_M_EXT));
> }
> if (plic->addr_config[i].mode == PLICMode_S) {
>
> The idea is that we need to increment the second argument to
> qdev_connect_gpio_out() for the PLICMode_M compared to the PLICMode_S
> case.
>
> This ensures that we do that correctly without breaking anything.
>
> How does that look to you?
>
>
Ahh yes. That makes sense.
Tested the updated change on multi-socket as well.


> Alistair
>
> >qdev_get_gpio_in(DEVICE(cpu),
> IRQ_M_EXT));
> >  }
> >  if (plic->addr_config[i].mode == PLICMode_S) {
> > --
> > 2.25.1
> >
> >
>


Re: [PATCH v4 04/17] dump: Rework get_start_block

2022-07-27 Thread Janis Schoetterl-Glausch
On 7/26/22 11:22, Janosch Frank wrote:
> get_start_block() returns the start address of the first memory block
> or -1.
> 
> With the GuestPhysBlock iterator conversion we don't need to set the
> start address and can therefore remove that code. The only
> functionality left is the validation of the start block so it only
> makes sense to re-name the function to validate_start_block()
> 
> Signed-off-by: Janosch Frank 

Reviewed-by: Janis Schoetterl-Glausch 

See suggenstions below.

> ---
>  dump/dump.c | 20 ++--
>  1 file changed, 6 insertions(+), 14 deletions(-)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index 35b9833a00..b59faf9941 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -1498,30 +1498,22 @@ static void create_kdump_vmcore(DumpState *s, Error 
> **errp)
>  }
>  }
>  
> -static ram_addr_t get_start_block(DumpState *s)
> +static int validate_start_block(DumpState *s)
>  {
>  GuestPhysBlock *block;
>  
>  if (!s->has_filter) {
> -s->next_block = QTAILQ_FIRST(>guest_phys_blocks.head);
>  return 0;
>  }
>  
>  QTAILQ_FOREACH(block, >guest_phys_blocks.head, next) {
> +/* This block is out of the range */
>  if (block->target_start >= s->begin + s->length ||
>  block->target_end <= s->begin) {
> -/* This block is out of the range */
>  continue;
>  }
> -
> -s->next_block = block;
> -if (s->begin > block->target_start) {
> -s->start = s->begin - block->target_start;
> -} else {
> -s->start = 0;
> -}
> -return s->start;
> -}
> +return 0;
> +   }
>  
>  return -1;
>  }

If you change the dump_get_memblock_* functions to take the DumpState, you 
could do

bool has_unfiltered_mem(DumpState *s)
{
GuestPhysBlock *block;

QTAILQ_FOREACH(block, >guest_phys_blocks.head, next) {
if (dump_get_memblock_size(block, s) > 0) {
return true;
}
}
return false;
}

or you could do the same with the existing dump_get_memblock_size, add

if (has_filter && !length) {
error_setg(errp, QERR_INVALID_PARAMETER, "length");
goto cleanup;
}

to dump_init, encode has_filter in length as you're currently doing and get rid 
of s->has_filter entirely.

> @@ -1668,8 +1660,8 @@ static void dump_init(DumpState *s, int fd, bool 
> has_format,
>  goto cleanup;
>  }
>  
> -s->start = get_start_block(s);
> -if (s->start == -1) {
> +/* Is the filter filtering everything? */
> +if (validate_start_block(s) == -1) {
>  error_setg(errp, QERR_INVALID_PARAMETER, "begin");
>  goto cleanup;
>  }




Re: [PATCH v2 07/11] acpi/tests/bits: add python test that exercizes QEMU bios tables using biosbits

2022-07-27 Thread Ani Sinha


On Mon, 25 Jul 2022, Ani Sinha wrote:

>
>
> On Sat, 16 Jul 2022, Michael S. Tsirkin wrote:
>
> > On Sat, Jul 16, 2022 at 12:06:00PM +0530, Ani Sinha wrote:
> > >
> > >
> > > On Fri, Jul 15, 2022 at 11:20 Michael S. Tsirkin  wrote:
> > >
> > > On Fri, Jul 15, 2022 at 09:47:27AM +0530, Ani Sinha wrote:
> > > > > Instead of all this mess, can't we just spawn e.g. "git clone 
> > > --depth
> > > 1"?
> > > > > And if the directory exists I would fetch and checkout.
> > > >
> > > > There are two reasons I can think of why I do not like this idea:
> > > >
> > > > (a) a git clone of a whole directory would download all versions of 
> > > the
> > > > binary whereas we want only a specific version.
> > >
> > > You mention shallow clone yourself, and I used --depth 1 above.
> > >
> > > > Downloading a single file
> > > > by shallow cloning or creating a git archive is overkill IMHO when 
> > > a wget
> > > > style retrieval works just fine.
> > >
> > > However, it does not provide for versioning, tagging etc so you have
> > > to implement your own schema.
> > >
> > >
> > > Hmm I’m not sure if we need all that. Bits has its own versioning 
> > > mechanism and
> > > I think all we need to do is maintain the same versioning logic and 
> > > maintain
> > > binaries of different  versions. Do we really need the power of 
> > > git/version
> > > control here? Dunno.
> >
> > Well we need some schema. Given we are not using official bits releases
> > I don't think we can reuse theirs.
>
> OK fine. Lets figuire out how to push bits somewhere in git.qemu.org and
> the binaries in some other repo first. Everything else hinges on that. We
> can fix the rest of the bits later incrementally.

DanPB, any thoughts on putting bits on git.qemu.org or where and how to
keep the binaries?

Re: [PATCH v4 03/17] dump: Convert GuestPhysBlock iterators and use the filter functions

2022-07-27 Thread Janis Schoetterl-Glausch
On 7/26/22 11:22, Janosch Frank wrote:
> The iteration over the memblocks is hard to understand so it's about
> time to clean it up. Instead of manually grabbing the next memblock we
> can use QTAILQ_FOREACH to iterate over all memblocks.
> 
> Additionally we move the calculation of the offset and length out by
> using the dump_get_memblock_*() functions.
> 
> Signed-off-by: Janosch Frank 

Reviewed-by: Janis Schoetterl-Glausch 

> ---
>  dump/dump.c | 51 +++
>  1 file changed, 11 insertions(+), 40 deletions(-)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index 0fd7c76c1e..35b9833a00 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -628,56 +628,27 @@ int64_t dump_get_memblock_start(GuestPhysBlock *block, 
> int64_t filter_area_start
>  return 0;
>  }
>  
> -static int get_next_block(DumpState *s, GuestPhysBlock *block)
> -{
> -while (1) {
> -block = QTAILQ_NEXT(block, next);
> -if (!block) {
> -/* no more block */
> -return 1;
> -}
> -
> -s->start = 0;
> -s->next_block = block;
> -if (s->has_filter) {
> -if (block->target_start >= s->begin + s->length ||
> -block->target_end <= s->begin) {
> -/* This block is out of the range */
> -continue;
> -}
> -
> -if (s->begin > block->target_start) {
> -s->start = s->begin - block->target_start;
> -}
> -}
> -
> -return 0;
> -}
> -}
> -
>  /* write all memory to vmcore */
>  static void dump_iterate(DumpState *s, Error **errp)
>  {
>  ERRP_GUARD();
>  GuestPhysBlock *block;
> -int64_t size;
> +int64_t memblock_size, memblock_start;
>  
> -do {
> -block = s->next_block;
> -
> -size = block->target_end - block->target_start;
> -if (s->has_filter) {
> -size -= s->start;
> -if (s->begin + s->length < block->target_end) {
> -size -= block->target_end - (s->begin + s->length);
> -}
> +QTAILQ_FOREACH(block, >guest_phys_blocks.head, next) {
> +memblock_start = dump_get_memblock_start(block, s->begin, s->length);
> +if (memblock_start == -1) {
> +continue;
>  }
> -write_memory(s, block, s->start, size, errp);
> +
> +memblock_size = dump_get_memblock_size(block, s->begin, s->length);
> +
> +/* Write the memory to file */
> +write_memory(s, block, memblock_start, memblock_size, errp);
>  if (*errp) {
>  return;
>  }
> -
> -} while (!get_next_block(s, block));
> +}
>  }
>  
>  static void create_vmcore(DumpState *s, Error **errp)




Re: [PATCH v4 01/17] dump: Rename write_elf_loads to write_elf_phdr_loads

2022-07-27 Thread Janis Schoetterl-Glausch
On 7/26/22 11:22, Janosch Frank wrote:
> Let's make it a bit clearer that we write the program headers of the
> PT_LOAD type.
> 
> Signed-off-by: Janosch Frank 
> Reviewed-by: Marc-André Lureau 

Reviewed-by: Janis Schoetterl-Glausch 

> ---
>  dump/dump.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index 4d9658ffa2..0ed7cf9c7b 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -490,7 +490,7 @@ static void get_offset_range(hwaddr phys_addr,
>  }
>  }
>  
> -static void write_elf_loads(DumpState *s, Error **errp)
> +static void write_elf_phdr_loads(DumpState *s, Error **errp)
>  {
>  ERRP_GUARD();
>  hwaddr offset, filesz;
> @@ -573,8 +573,8 @@ static void dump_begin(DumpState *s, Error **errp)
>  return;
>  }
>  
> -/* write all PT_LOAD to vmcore */
> -write_elf_loads(s, errp);
> +/* write all PT_LOADs to vmcore */
> +write_elf_phdr_loads(s, errp);
>  if (*errp) {
>  return;
>  }




Re: [PATCH v3 05/12] ppc/pnv: turn PnvPHB4 into a PnvPHB backend

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:


diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 1df91971b8..b7273f386e 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -672,7 +672,14 @@ static void pnv_chip_power8_pic_print_info(PnvChip *chip, 
Monitor *mon)
  static int pnv_chip_power9_pic_print_info_child(Object *child, void *opaque)
  {
  Monitor *mon = opaque;
-PnvPHB4 *phb4 = (PnvPHB4 *) object_dynamic_cast(child, TYPE_PNV_PHB4);
+PnvPHB *phb =  (PnvPHB *) object_dynamic_cast(child, TYPE_PNV_PHB);
+PnvPHB4 *phb4;
+
+if (!phb) {
+return 0;
+}
+
+phb4 = (PnvPHB4 *)phb->backend;
  
  if (phb4) {

  pnv_phb4_pic_print_info(phb4, mon);



The full code in pnv_chip_power9_pic_print_info_child() looks like this:

PnvPHB *phb =  (PnvPHB *) object_dynamic_cast(child, TYPE_PNV_PHB);
PnvPHB4 *phb4;

if (!phb) {
return 0;
}

phb4 = (PnvPHB4 *)phb->backend;

if (phb4) {
pnv_phb4_pic_print_info(phb4, mon);
}

Which is correct. However, if I want to nitpick, phb->backend is defined 
when the PnvPHB object is realized, so I don't think we can get here 
with the pointer being null, so we could remove the second if statement 
for readability. The reason I mention it is that we don't take that much 
care in the pnv_chip_power8_pic_print_info() function just above, so it 
looks a bit odd.


In any case:
Reviewed-by: Frederic Barrat 

  Fred





@@ -2122,8 +2129,14 @@ static void pnv_machine_power9_class_init(ObjectClass 
*oc, void *data)
  PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc);
  static const char compat[] = "qemu,powernv9\0ibm,powernv";
  
+static GlobalProperty phb_compat[] = {

+{ TYPE_PNV_PHB, "version", "4" },
+};
+
  mc->desc = "IBM PowerNV (Non-Virtualized) POWER9";
  mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0");
+compat_props_add(mc->compat_props, phb_compat, G_N_ELEMENTS(phb_compat));
+
  xfc->match_nvt = pnv_match_nvt;
  
  mc->alias = "powernv";

@@ -2140,8 +2153,13 @@ static void pnv_machine_power10_class_init(ObjectClass 
*oc, void *data)
  XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc);
  static const char compat[] = "qemu,powernv10\0ibm,powernv";
  
+static GlobalProperty phb_compat[] = {

+{ TYPE_PNV_PHB, "version", "5" },
+};
+
  mc->desc = "IBM PowerNV (Non-Virtualized) POWER10";
  mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0");
+compat_props_add(mc->compat_props, phb_compat, G_N_ELEMENTS(phb_compat));
  
  pmc->compat = compat;

  pmc->compat_size = sizeof(compat);
diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index 90843ac3a9..f22253358f 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -18,6 +18,7 @@
  typedef struct PnvPhb4PecState PnvPhb4PecState;
  typedef struct PnvPhb4PecStack PnvPhb4PecStack;
  typedef struct PnvPHB4 PnvPHB4;
+typedef struct PnvPHB PnvPHB;
  typedef struct PnvChip PnvChip;
  
  /*

@@ -78,7 +79,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvPHB4, PNV_PHB4)
  #define PCI_MMIO_TOTAL_SIZE(0x1ull << 60)
  
  struct PnvPHB4 {

-PCIExpressHost parent_obj;
+DeviceState parent;
+
+PnvPHB *phb_base;
  
  uint32_t chip_id;

  uint32_t phb_id;




Re: [PATCH for 7.1 2/2] aspeed/fby35: Fix owner of the BMC RAM memory region

2022-07-27 Thread Peter Delevoryas
On Wed, Jul 27, 2022 at 12:27:14PM +0200, Cédric Le Goater wrote:
> A MachineState object is used as a owner of the RAM region and this
> asserts in memory_region_init_ram() when QEMU is built with
> CONFIG_QOM_CAST_DEBUG :
> 
> /* This will assert if owner is neither NULL nor a DeviceState.
>  * We only want the owner here for the purposes of defining a
>  * unique name for migration. TODO: Ideally we should implement
>  * a naming scheme for Objects which are not DeviceStates, in
>  * which case we can relax this restriction.
>  */
> owner_dev = DEVICE(owner);
> 
> Use the BMC and BIC objects as the owners of their memory regions.
> 
> Cc: Peter Delevoryas 
> Fixes: 778e14cc5cd5 ("aspeed: Add AST2600 (BMC) to fby35")
> Signed-off-by: Cédric Le Goater 
> ---

Looks good to me, thanks for fixing this Cedric!
Peter

Reviewed-by: Peter Delevoryas 

>  hw/arm/fby35.c | 14 +-
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/arm/fby35.c b/hw/arm/fby35.c
> index 79605f306462..90c04bbc3389 100644
> --- a/hw/arm/fby35.c
> +++ b/hw/arm/fby35.c
> @@ -72,11 +72,13 @@ static void fby35_bmc_init(Fby35State *s)
>  {
>  DriveInfo *drive0 = drive_get(IF_MTD, 0, 0);
>  
> -memory_region_init(>bmc_memory, OBJECT(s), "bmc-memory", UINT64_MAX);
> -memory_region_init_ram(>bmc_dram, OBJECT(s), "bmc-dram",
> +object_initialize_child(OBJECT(s), "bmc", >bmc, "ast2600-a3");
> +
> +memory_region_init(>bmc_memory, OBJECT(>bmc), "bmc-memory",
> +   UINT64_MAX);
> +memory_region_init_ram(>bmc_dram, OBJECT(>bmc), "bmc-dram",
> FBY35_BMC_RAM_SIZE, _abort);
>  
> -object_initialize_child(OBJECT(s), "bmc", >bmc, "ast2600-a3");
>  object_property_set_int(OBJECT(>bmc), "ram-size", FBY35_BMC_RAM_SIZE,
>  _abort);
>  object_property_set_link(OBJECT(>bmc), "memory", 
> OBJECT(>bmc_memory),
> @@ -120,9 +122,11 @@ static void fby35_bic_init(Fby35State *s)
>  s->bic_sysclk = clock_new(OBJECT(s), "SYSCLK");
>  clock_set_hz(s->bic_sysclk, 2ULL);
>  
> -memory_region_init(>bic_memory, OBJECT(s), "bic-memory", UINT64_MAX);
> -
>  object_initialize_child(OBJECT(s), "bic", >bic, "ast1030-a1");
> +
> +memory_region_init(>bic_memory, OBJECT(>bic), "bic-memory",
> +   UINT64_MAX);
> +
>  qdev_connect_clock_in(DEVICE(>bic), "sysclk", s->bic_sysclk);
>  object_property_set_link(OBJECT(>bic), "memory", 
> OBJECT(>bic_memory),
>   _abort);
> -- 
> 2.37.1
> 



Re: [PATCH v2 6/9] aspeed: Add AST2600 (BMC) to fby35

2022-07-27 Thread Peter Delevoryas
On Wed, Jul 27, 2022 at 12:05:58PM +0200, Cédric Le Goater wrote:
> On 7/5/22 21:13, Peter Delevoryas wrote:
> > You can test booting the BMC with both '-device loader' and '-drive
> > file'. This is necessary because of how the fb-openbmc boot sequence
> > works (jump to 0x2000 after U-Boot SPL).
> > 
> >  wget 
> > https://github.com/facebook/openbmc/releases/download/openbmc-e2294ff5d31d/fby35.mtd
> >  qemu-system-arm -machine fby35 -nographic \
> >  -device loader,file=fby35.mtd,addr=0,cpu-num=0 -drive 
> > file=fby35.mtd,format=raw,if=mtd
> > 
> > Signed-off-by: Peter Delevoryas 
> > ---
> >   hw/arm/fby35.c | 41 +
> >   1 file changed, 41 insertions(+)
> > 
> > diff --git a/hw/arm/fby35.c b/hw/arm/fby35.c
> > index 03b458584c..5c5224d374 100644
> > --- a/hw/arm/fby35.c
> > +++ b/hw/arm/fby35.c
> > @@ -6,17 +6,55 @@
> >*/
> >   #include "qemu/osdep.h"
> > +#include "qemu/units.h"
> > +#include "qapi/error.h"
> > +#include "sysemu/sysemu.h"
> >   #include "hw/boards.h"
> > +#include "hw/arm/aspeed_soc.h"
> >   #define TYPE_FBY35 MACHINE_TYPE_NAME("fby35")
> >   OBJECT_DECLARE_SIMPLE_TYPE(Fby35State, FBY35);
> >   struct Fby35State {
> >   MachineState parent_obj;
> > +
> > +MemoryRegion bmc_memory;
> > +MemoryRegion bmc_dram;
> > +MemoryRegion bmc_boot_rom;
> > +
> > +AspeedSoCState bmc;
> >   };
> > +#define FBY35_BMC_RAM_SIZE (2 * GiB)
> > +
> > +static void fby35_bmc_init(Fby35State *s)
> > +{
> > +memory_region_init(>bmc_memory, OBJECT(s), "bmc-memory", 
> > UINT64_MAX);
> > +memory_region_init_ram(>bmc_dram, OBJECT(s), "bmc-dram",
> > +   FBY35_BMC_RAM_SIZE, _abort);
> 
> A MachineState object is used as a owner of the RAM region and this
> should assert in memory_region_init_ram() :
> 
> /* This will assert if owner is neither NULL nor a DeviceState.
>  * We only want the owner here for the purposes of defining a
>  * unique name for migration. TODO: Ideally we should implement
>  * a naming scheme for Objects which are not DeviceStates, in
>  * which case we can relax this restriction.
>  */
> owner_dev = DEVICE(owner);
> 
> It went unnoticed until I started experimenting with some MachineState
> modifications. CONFIG_QOM_CAST_DEBUG needs to be defined to catch the
> error. I would have thought that CI was doing this check. It seems not,
> which is surprising.

Hmmm! I see, didn't realize this was a requirement. Thanks for catching it!

> 
> Anyhow, this needs a fix for 7.1 and I will work on it.

I see, yes, thanks!!

> 
> C.
> 
> > +
> > +object_initialize_child(OBJECT(s), "bmc", >bmc, "ast2600-a3");
> > +object_property_set_int(OBJECT(>bmc), "ram-size", 
> > FBY35_BMC_RAM_SIZE,
> > +_abort);
> > +object_property_set_link(OBJECT(>bmc), "memory", 
> > OBJECT(>bmc_memory),
> > + _abort);
> > +object_property_set_link(OBJECT(>bmc), "dram", OBJECT(>bmc_dram),
> > + _abort);
> > +object_property_set_int(OBJECT(>bmc), "hw-strap1", 0x00C0,
> > +_abort);
> > +object_property_set_int(OBJECT(>bmc), "hw-strap2", 0x0003,
> > +_abort);
> > +aspeed_soc_uart_set_chr(>bmc, ASPEED_DEV_UART5, serial_hd(0));
> > +qdev_realize(DEVICE(>bmc), NULL, _abort);
> > +
> > +aspeed_board_init_flashes(>bmc.fmc, "n25q00", 2, 0);
> > +}
> > +
> >   static void fby35_init(MachineState *machine)
> >   {
> > +Fby35State *s = FBY35(machine);
> > +
> > +fby35_bmc_init(s);
> >   }
> >   static void fby35_class_init(ObjectClass *oc, void *data)
> > @@ -25,6 +63,9 @@ static void fby35_class_init(ObjectClass *oc, void *data)
> >   mc->desc = "Meta Platforms fby35";
> >   mc->init = fby35_init;
> > +mc->no_floppy = 1;
> > +mc->no_cdrom = 1;
> > +mc->min_cpus = mc->max_cpus = mc->default_cpus = 2;
> >   }
> >   static const TypeInfo fby35_types[] = {
> 



Re: [PATCH v3 10/12] ppc/pnv: remove pecc->rp_model

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

The attribute is unused.

Signed-off-by: Daniel Henrique Barboza 
---



Reviewed-by: Frederic Barrat 

  Fred



  hw/pci-host/pnv_phb4_pec.c | 2 --
  include/hw/pci-host/pnv_phb4.h | 1 -
  2 files changed, 3 deletions(-)

diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
index 0ef66b9a9b..8dc363d69c 100644
--- a/hw/pci-host/pnv_phb4_pec.c
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -260,7 +260,6 @@ static void pnv_pec_class_init(ObjectClass *klass, void 
*data)
  pecc->version = PNV_PHB4_VERSION;
  pecc->phb_type = TYPE_PNV_PHB4;
  pecc->num_phbs = pnv_pec_num_phbs;
-pecc->rp_model = TYPE_PNV_PHB_ROOT_PORT;
  }
  
  static const TypeInfo pnv_pec_type_info = {

@@ -313,7 +312,6 @@ static void pnv_phb5_pec_class_init(ObjectClass *klass, 
void *data)
  pecc->version = PNV_PHB5_VERSION;
  pecc->phb_type = TYPE_PNV_PHB5;
  pecc->num_phbs = pnv_phb5_pec_num_stacks;
-pecc->rp_model = TYPE_PNV_PHB_ROOT_PORT;
  }
  
  static const TypeInfo pnv_phb5_pec_type_info = {

diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index 29c49ac79c..61a0cb9989 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -200,7 +200,6 @@ struct PnvPhb4PecClass {
  uint64_t version;
  const char *phb_type;
  const uint32_t *num_phbs;
-const char *rp_model;
  };
  
  /*




Re: [PATCH v3 09/12] ppc/pnv: remove root port name from pnv_phb_attach_root_port()

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

We support only a single root port, PNV_PHB_ROOT_PORT.

Signed-off-by: Daniel Henrique Barboza 
---



Reviewed-by: Frederic Barrat 

  Fred



  hw/pci-host/pnv_phb.c | 7 +--
  hw/ppc/pnv.c  | 9 +
  include/hw/ppc/pnv.h  | 3 +--
  3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/hw/pci-host/pnv_phb.c b/hw/pci-host/pnv_phb.c
index da729e89e7..cc15a949c9 100644
--- a/hw/pci-host/pnv_phb.c
+++ b/hw/pci-host/pnv_phb.c
@@ -24,7 +24,6 @@ static void pnv_phb_realize(DeviceState *dev, Error **errp)
  PnvPHB *phb = PNV_PHB(dev);
  PCIHostState *pci = PCI_HOST_BRIDGE(dev);
  g_autofree char *phb_typename = NULL;
-g_autofree char *phb_rootport_typename = NULL;
  
  if (!phb->version) {

  error_setg(errp, "version not specified");
@@ -34,15 +33,12 @@ static void pnv_phb_realize(DeviceState *dev, Error **errp)
  switch (phb->version) {
  case 3:
  phb_typename = g_strdup(TYPE_PNV_PHB3);
-phb_rootport_typename = g_strdup(TYPE_PNV_PHB_ROOT_PORT);
  break;
  case 4:
  phb_typename = g_strdup(TYPE_PNV_PHB4);
-phb_rootport_typename = g_strdup(TYPE_PNV_PHB_ROOT_PORT);
  break;
  case 5:
  phb_typename = g_strdup(TYPE_PNV_PHB5);
-phb_rootport_typename = g_strdup(TYPE_PNV_PHB_ROOT_PORT);
  break;
  default:
  g_assert_not_reached();
@@ -73,8 +69,7 @@ static void pnv_phb_realize(DeviceState *dev, Error **errp)
  pnv_phb4_bus_init(dev, PNV_PHB4(phb->backend));
  }
  
-pnv_phb_attach_root_port(pci, phb_rootport_typename,

- phb->phb_id, phb->chip_id);
+pnv_phb_attach_root_port(pci, phb->phb_id, phb->chip_id);
  }
  
  static const char *pnv_phb_root_bus_path(PCIHostState *host_bridge,

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 159899103e..5b7cbfc699 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1199,11 +1199,12 @@ static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error 
**errp)
   * QOM id. 'chip_id' is going to be used as PCIE chassis for the
   * root port.
   */
-void pnv_phb_attach_root_port(PCIHostState *pci, const char *name,
-  int index, int chip_id)
+void pnv_phb_attach_root_port(PCIHostState *pci, int index, int chip_id)
  {
-PCIDevice *root = pci_new(PCI_DEVFN(0, 0), name);
-g_autofree char *default_id = g_strdup_printf("%s[%d]", name, index);
+PCIDevice *root = pci_new(PCI_DEVFN(0, 0), TYPE_PNV_PHB_ROOT_PORT);
+g_autofree char *default_id = g_strdup_printf("%s[%d]",
+  TYPE_PNV_PHB_ROOT_PORT,
+  index);
  const char *dev_id = DEVICE(root)->id;
  
  object_property_add_child(OBJECT(pci->bus), dev_id ? dev_id : default_id,

diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 33b7b52f45..fbad11d6a7 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -190,8 +190,7 @@ DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER10,
   TYPE_PNV_CHIP_POWER10)
  
  PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir);

-void pnv_phb_attach_root_port(PCIHostState *pci, const char *name,
-  int index, int chip_id);
+void pnv_phb_attach_root_port(PCIHostState *pci, int index, int chip_id);
  
  #define TYPE_PNV_MACHINE   MACHINE_TYPE_NAME("powernv")

  typedef struct PnvMachineClass PnvMachineClass;




Re: [PATCH v3 06/12] ppc/pnv: add pnv-phb-root-port device

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

We have two very similar root-port devices, pnv-phb3-root-port and
pnv-phb4-root-port. Both consist of a wrapper around the PCIESlot device
that, until now, has no additional attributes.

The main difference between the PHB3 and PHB4 root ports is that
pnv-phb4-root-port has the pnv_phb4_root_port_reset() callback. All
other differences can be merged in a single device without too much
trouble.

This patch introduces the unified pnv-phb-root-port that, in time, will
be used as the default root port for the pnv-phb device.

Signed-off-by: Daniel Henrique Barboza 
---



Reviewed-by: Frederic Barrat 

  Fred



  hw/pci-host/pnv_phb.c | 115 +++---
  hw/pci-host/pnv_phb.h |  16 ++
  2 files changed, 123 insertions(+), 8 deletions(-)

diff --git a/hw/pci-host/pnv_phb.c b/hw/pci-host/pnv_phb.c
index abcbcca445..5e61f85614 100644
--- a/hw/pci-host/pnv_phb.c
+++ b/hw/pci-host/pnv_phb.c
@@ -112,15 +112,114 @@ static void pnv_phb_class_init(ObjectClass *klass, void 
*data)
  dc->user_creatable = false;
  }
  
-static void pnv_phb_register_type(void)

+static void pnv_phb_root_port_reset(DeviceState *dev)
  {
-static const TypeInfo pnv_phb_type_info = {
-.name  = TYPE_PNV_PHB,
-.parent= TYPE_PCIE_HOST_BRIDGE,
-.instance_size = sizeof(PnvPHB),
-.class_init= pnv_phb_class_init,
-};
+PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+PnvPHBRootPort *phb_rp = PNV_PHB_ROOT_PORT(dev);
+PCIDevice *d = PCI_DEVICE(dev);
+uint8_t *conf = d->config;
  
+rpc->parent_reset(dev);

+
+if (phb_rp->version == 3) {
+return;
+}
+
+/* PHB4 and later requires these extra reset steps */
+pci_byte_test_and_set_mask(conf + PCI_IO_BASE,
+   PCI_IO_RANGE_MASK & 0xff);
+pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT,
+ PCI_IO_RANGE_MASK & 0xff);
+pci_set_word(conf + PCI_MEMORY_BASE, 0);
+pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0);
+pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1);
+pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1);
+pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */
+pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0x);
+pci_config_set_interrupt_pin(conf, 0);
+}
+
+static void pnv_phb_root_port_realize(DeviceState *dev, Error **errp)
+{
+PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+PnvPHBRootPort *phb_rp = PNV_PHB_ROOT_PORT(dev);
+PCIDevice *pci = PCI_DEVICE(dev);
+uint16_t device_id = 0;
+Error *local_err = NULL;
+
+rpc->parent_realize(dev, _err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+
+switch (phb_rp->version) {
+case 3:
+device_id = PNV_PHB3_DEVICE_ID;
+break;
+case 4:
+device_id = PNV_PHB4_DEVICE_ID;
+break;
+case 5:
+device_id = PNV_PHB5_DEVICE_ID;
+break;
+default:
+g_assert_not_reached();
+}
+
+pci_config_set_device_id(pci->config, device_id);
+pci_config_set_interrupt_pin(pci->config, 0);
+}
+
+static Property pnv_phb_root_port_properties[] = {
+DEFINE_PROP_UINT32("version", PnvPHBRootPort, version, 0),
+
+DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_phb_root_port_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
+
+dc->desc = "IBM PHB PCIE Root Port";
+
+device_class_set_props(dc, pnv_phb_root_port_properties);
+device_class_set_parent_realize(dc, pnv_phb_root_port_realize,
+>parent_realize);
+device_class_set_parent_reset(dc, pnv_phb_root_port_reset,
+  >parent_reset);
+dc->reset = _phb_root_port_reset;
+dc->user_creatable = false;
+
+k->vendor_id = PCI_VENDOR_ID_IBM;
+/* device_id will be written during realize() */
+k->device_id = 0;
+k->revision  = 0;
+
+rpc->exp_offset = 0x48;
+rpc->aer_offset = 0x100;
+}
+
+static const TypeInfo pnv_phb_type_info = {
+.name  = TYPE_PNV_PHB,
+.parent= TYPE_PCIE_HOST_BRIDGE,
+.instance_size = sizeof(PnvPHB),
+.class_init= pnv_phb_class_init,
+};
+
+static const TypeInfo pnv_phb_root_port_info = {
+.name  = TYPE_PNV_PHB_ROOT_PORT,
+.parent= TYPE_PCIE_ROOT_PORT,
+.instance_size = sizeof(PnvPHBRootPort),
+.class_init= pnv_phb_root_port_class_init,
+};
+
+static void pnv_phb_register_types(void)
+{
  type_register_static(_phb_type_info);
+type_register_static(_phb_root_port_info);
  }
-type_init(pnv_phb_register_type)
+
+type_init(pnv_phb_register_types)
diff --git a/hw/pci-host/pnv_phb.h b/hw/pci-host/pnv_phb.h
index a7cc8610e2..58ebd6dd0f 

Re: [PATCH v3 12/12] ppc/pnv: move attach_root_port helper to pnv-phb.c

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

The helper is only used in this file.

Signed-off-by: Daniel Henrique Barboza 
---



Reviewed-by: Frederic Barrat 

  Fred



  hw/pci-host/pnv_phb.c | 24 
  hw/ppc/pnv.c  | 25 -
  include/hw/ppc/pnv.h  |  1 -
  3 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/hw/pci-host/pnv_phb.c b/hw/pci-host/pnv_phb.c
index cc15a949c9..c47ed92462 100644
--- a/hw/pci-host/pnv_phb.c
+++ b/hw/pci-host/pnv_phb.c
@@ -18,6 +18,30 @@
  #include "hw/qdev-properties.h"
  #include "qom/object.h"
  
+/*

+ * Attach a root port device.
+ *
+ * 'index' will be used both as a PCIE slot value and to calculate
+ * QOM id. 'chip_id' is going to be used as PCIE chassis for the
+ * root port.
+ */
+static void pnv_phb_attach_root_port(PCIHostState *pci, int index, int chip_id)
+{
+PCIDevice *root = pci_new(PCI_DEVFN(0, 0), TYPE_PNV_PHB_ROOT_PORT);
+g_autofree char *default_id = g_strdup_printf("%s[%d]",
+  TYPE_PNV_PHB_ROOT_PORT,
+  index);
+const char *dev_id = DEVICE(root)->id;
+
+object_property_add_child(OBJECT(pci->bus), dev_id ? dev_id : default_id,
+  OBJECT(root));
+
+/* Set unique chassis/slot values for the root port */
+qdev_prop_set_uint8(DEVICE(root), "chassis", chip_id);
+qdev_prop_set_uint16(DEVICE(root), "slot", index);
+
+pci_realize_and_unref(root, pci->bus, _fatal);
+}
  
  static void pnv_phb_realize(DeviceState *dev, Error **errp)

  {
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 5b7cbfc699..d649ed6b1b 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1192,31 +1192,6 @@ static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error 
**errp)
  }
  }
  
-/*

- * Attach a root port device.
- *
- * 'index' will be used both as a PCIE slot value and to calculate
- * QOM id. 'chip_id' is going to be used as PCIE chassis for the
- * root port.
- */
-void pnv_phb_attach_root_port(PCIHostState *pci, int index, int chip_id)
-{
-PCIDevice *root = pci_new(PCI_DEVFN(0, 0), TYPE_PNV_PHB_ROOT_PORT);
-g_autofree char *default_id = g_strdup_printf("%s[%d]",
-  TYPE_PNV_PHB_ROOT_PORT,
-  index);
-const char *dev_id = DEVICE(root)->id;
-
-object_property_add_child(OBJECT(pci->bus), dev_id ? dev_id : default_id,
-  OBJECT(root));
-
-/* Set unique chassis/slot values for the root port */
-qdev_prop_set_uint8(DEVICE(root), "chassis", chip_id);
-qdev_prop_set_uint16(DEVICE(root), "slot", index);
-
-pci_realize_and_unref(root, pci->bus, _fatal);
-}
-
  static void pnv_chip_power8_realize(DeviceState *dev, Error **errp)
  {
  PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index fbad11d6a7..033d907287 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -190,7 +190,6 @@ DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER10,
   TYPE_PNV_CHIP_POWER10)
  
  PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir);

-void pnv_phb_attach_root_port(PCIHostState *pci, int index, int chip_id);
  
  #define TYPE_PNV_MACHINE   MACHINE_TYPE_NAME("powernv")

  typedef struct PnvMachineClass PnvMachineClass;




Re: [PATCH v3 08/12] ppc/pnv: remove pnv-phb4-root-port

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

The unified pnv-phb-root-port can be used instead. The phb4-root-port
device isn't exposed to the user in any official QEMU release so there's
no ABI breakage in removing it.

Signed-off-by: Daniel Henrique Barboza 
---



Reviewed-by: Frederic Barrat 

  Fred



  hw/pci-host/pnv_phb.c  |  4 +-
  hw/pci-host/pnv_phb4.c | 85 --
  hw/pci-host/pnv_phb4_pec.c |  4 +-
  hw/ppc/pnv.c   |  2 +
  include/hw/pci-host/pnv_phb4.h |  9 
  5 files changed, 6 insertions(+), 98 deletions(-)

diff --git a/hw/pci-host/pnv_phb.c b/hw/pci-host/pnv_phb.c
index cdddc6a389..da729e89e7 100644
--- a/hw/pci-host/pnv_phb.c
+++ b/hw/pci-host/pnv_phb.c
@@ -38,11 +38,11 @@ static void pnv_phb_realize(DeviceState *dev, Error **errp)
  break;
  case 4:
  phb_typename = g_strdup(TYPE_PNV_PHB4);
-phb_rootport_typename = g_strdup(TYPE_PNV_PHB4_ROOT_PORT);
+phb_rootport_typename = g_strdup(TYPE_PNV_PHB_ROOT_PORT);
  break;
  case 5:
  phb_typename = g_strdup(TYPE_PNV_PHB5);
-phb_rootport_typename = g_strdup(TYPE_PNV_PHB5_ROOT_PORT);
+phb_rootport_typename = g_strdup(TYPE_PNV_PHB_ROOT_PORT);
  break;
  default:
  g_assert_not_reached();
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 74cf62dc1a..fefdd3ad89 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1741,94 +1741,9 @@ static const TypeInfo pnv_phb4_root_bus_info = {
  .class_init = pnv_phb4_root_bus_class_init,
  };
  
-static void pnv_phb4_root_port_reset(DeviceState *dev)

-{
-PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
-PCIDevice *d = PCI_DEVICE(dev);
-uint8_t *conf = d->config;
-
-rpc->parent_reset(dev);
-
-pci_byte_test_and_set_mask(conf + PCI_IO_BASE,
-   PCI_IO_RANGE_MASK & 0xff);
-pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT,
- PCI_IO_RANGE_MASK & 0xff);
-pci_set_word(conf + PCI_MEMORY_BASE, 0);
-pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0);
-pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1);
-pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1);
-pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */
-pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0x);
-pci_config_set_interrupt_pin(conf, 0);
-}
-
-static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp)
-{
-PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
-Error *local_err = NULL;
-
-rpc->parent_realize(dev, _err);
-if (local_err) {
-error_propagate(errp, local_err);
-return;
-}
-}
-
-static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data)
-{
-DeviceClass *dc = DEVICE_CLASS(klass);
-PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
-PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
-
-dc->desc = "IBM PHB4 PCIE Root Port";
-dc->user_creatable = false;
-
-device_class_set_parent_realize(dc, pnv_phb4_root_port_realize,
->parent_realize);
-device_class_set_parent_reset(dc, pnv_phb4_root_port_reset,
-  >parent_reset);
-
-k->vendor_id = PCI_VENDOR_ID_IBM;
-k->device_id = PNV_PHB4_DEVICE_ID;
-k->revision  = 0;
-
-rpc->exp_offset = 0x48;
-rpc->aer_offset = 0x100;
-
-dc->reset = _phb4_root_port_reset;
-}
-
-static const TypeInfo pnv_phb4_root_port_info = {
-.name  = TYPE_PNV_PHB4_ROOT_PORT,
-.parent= TYPE_PCIE_ROOT_PORT,
-.instance_size = sizeof(PnvPHB4RootPort),
-.class_init= pnv_phb4_root_port_class_init,
-};
-
-static void pnv_phb5_root_port_class_init(ObjectClass *klass, void *data)
-{
-DeviceClass *dc = DEVICE_CLASS(klass);
-PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
-
-dc->desc = "IBM PHB5 PCIE Root Port";
-dc->user_creatable = false;
-
-k->vendor_id = PCI_VENDOR_ID_IBM;
-k->device_id = PNV_PHB5_DEVICE_ID;
-}
-
-static const TypeInfo pnv_phb5_root_port_info = {
-.name  = TYPE_PNV_PHB5_ROOT_PORT,
-.parent= TYPE_PNV_PHB4_ROOT_PORT,
-.instance_size = sizeof(PnvPHB4RootPort),
-.class_init= pnv_phb5_root_port_class_init,
-};
-
  static void pnv_phb4_register_types(void)
  {
  type_register_static(_phb4_root_bus_info);
-type_register_static(_phb5_root_port_info);
-type_register_static(_phb4_root_port_info);
  type_register_static(_phb4_type_info);
  type_register_static(_phb5_type_info);
  type_register_static(_phb4_iommu_memory_region_info);
diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
index 4a0a9fbe8b..0ef66b9a9b 100644
--- a/hw/pci-host/pnv_phb4_pec.c
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -260,7 +260,7 @@ static void pnv_pec_class_init(ObjectClass *klass, void 
*data)
  pecc->version = 

Re: [PATCH v3 11/12] ppc/pnv: remove PnvPHB4.version

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

It's unused.

Signed-off-by: Daniel Henrique Barboza 
---



Reviewed-by: Frederic Barrat 

  Fred



  include/hw/pci-host/pnv_phb4.h | 2 --
  1 file changed, 2 deletions(-)

diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index 61a0cb9989..20aa4819d3 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -77,8 +77,6 @@ struct PnvPHB4 {
  uint32_t chip_id;
  uint32_t phb_id;
  
-uint64_t version;

-
  /* The owner PEC */
  PnvPhb4PecState *pec;
  




Re: [PATCH] virtio-pci: don't touch pci on virtio reset

2022-07-27 Thread Claudio Fontana
Hmm, no that's not it,
still segfault in ovs, but I'll try to see if I can merge that and the 
workaround I am currently using...


On 7/27/22 19:14, Claudio Fontana wrote:
> testing it now...
> 
> Thanks!
> 
> C
> 
> 
> On 7/27/22 18:14, Michael S. Tsirkin wrote:
>> virtio level reset should not affect pci express
>> registers such as PM, error or link.
>>
>> Fixes: 27ce0f3afc ("hw/virtio: fix Power Management Control Register for PCI 
>> Express virtio devices")
>> Fixes: d584f1b9ca ("hw/virtio: fix Link Control Register for PCI Express 
>> virtio devices")
>> Fixes: c2cabb3422 ("hw/virtio: fix error enabling flags in Device Control 
>> register")
>> Cc: "Marcel Apfelbaum" 
>> Signed-off-by: Michael S. Tsirkin 
>> ---
>>  hw/virtio/virtio-pci.c | 9 -
>>  1 file changed, 8 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
>> index 45327f0b31..3189ec014d 100644
>> --- a/hw/virtio/virtio-pci.c
>> +++ b/hw/virtio/virtio-pci.c
>> @@ -1955,6 +1955,13 @@ static void virtio_pci_reset(DeviceState *qdev)
>>  proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
>>  proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
>>  }
>> +}
>> +
>> +static void virtio_pci_bus_reset(DeviceState *qdev)
>> +{
>> +PCIDevice *dev = PCI_DEVICE(qdev);
>> +
>> +virtio_pci_reset(qdev);
>>  
>>  if (pci_is_express(dev)) {
>>  pcie_cap_deverr_reset(dev);
>> @@ -2022,7 +2029,7 @@ static void virtio_pci_class_init(ObjectClass *klass, 
>> void *data)
>>  k->class_id = PCI_CLASS_OTHERS;
>>  device_class_set_parent_realize(dc, virtio_pci_dc_realize,
>>  >parent_dc_realize);
>> -dc->reset = virtio_pci_reset;
>> +dc->reset = virtio_pci_bus_reset;
>>  }
>>  
>>  static const TypeInfo virtio_pci_info = {
> 
> 




Re: [PATCH v3 02/12] ppc/pnv: add PnvPHB base/proxy device

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

The PnvPHB device is going to be the base device for all other powernv
PHBs. It consists of a device that has the same user API as the other
PHB, namely being a PCIHostBridge and having chip-id and index
properties. It also has a 'backend' pointer that will be initialized
with the PHB implementation that the device is going to use.

The initialization of the PHB backend is done by checking the PHB
version via a 'version' attribute that can be set via a global machine
property.  The 'version' field will be used to make adjustments based on
the running version, e.g. PHB3 uses a 'chip' reference while PHB4 uses
'pec'. To init the PnvPHB bus we'll rely on helpers for each version.
The version 3 helper is already added (pnv_phb3_bus_init), the PHB4
helper will be added later on.

For now let's add the basic logic of the PnvPHB object, which consists
mostly of pnv_phb_realize() doing all the work of checking the
phb->version set, initializing the proper backend, passing through its
attributes to the chosen backend, finalizing the backend realize and
adding a root port in the end.

Signed-off-by: Daniel Henrique Barboza 
---



Reviewed-by: Frederic Barrat 

  Fred



  hw/pci-host/meson.build |   3 +-
  hw/pci-host/pnv_phb.c   | 124 
  hw/pci-host/pnv_phb.h   |  39 +
  3 files changed, 165 insertions(+), 1 deletion(-)
  create mode 100644 hw/pci-host/pnv_phb.c
  create mode 100644 hw/pci-host/pnv_phb.h

diff --git a/hw/pci-host/meson.build b/hw/pci-host/meson.build
index c07596d0d1..e832babc9d 100644
--- a/hw/pci-host/meson.build
+++ b/hw/pci-host/meson.build
@@ -35,5 +35,6 @@ specific_ss.add(when: 'CONFIG_PCI_POWERNV', if_true: files(
'pnv_phb3_msi.c',
'pnv_phb3_pbcq.c',
'pnv_phb4.c',
-  'pnv_phb4_pec.c'
+  'pnv_phb4_pec.c',
+  'pnv_phb.c',
  ))
diff --git a/hw/pci-host/pnv_phb.c b/hw/pci-host/pnv_phb.c
new file mode 100644
index 00..6fefff7d44
--- /dev/null
+++ b/hw/pci-host/pnv_phb.c
@@ -0,0 +1,124 @@
+/*
+ * QEMU PowerPC PowerNV Proxy PHB model
+ *
+ * Copyright (c) 2022, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/visitor.h"
+#include "qapi/error.h"
+#include "hw/pci-host/pnv_phb.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/pci-host/pnv_phb4.h"
+#include "hw/ppc/pnv.h"
+#include "hw/qdev-properties.h"
+#include "qom/object.h"
+
+
+static void pnv_phb_realize(DeviceState *dev, Error **errp)
+{
+PnvPHB *phb = PNV_PHB(dev);
+PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+g_autofree char *phb_typename = NULL;
+g_autofree char *phb_rootport_typename = NULL;
+
+if (!phb->version) {
+error_setg(errp, "version not specified");
+return;
+}
+
+switch (phb->version) {
+case 3:
+phb_typename = g_strdup(TYPE_PNV_PHB3);
+phb_rootport_typename = g_strdup(TYPE_PNV_PHB3_ROOT_PORT);
+break;
+case 4:
+phb_typename = g_strdup(TYPE_PNV_PHB4);
+phb_rootport_typename = g_strdup(TYPE_PNV_PHB4_ROOT_PORT);
+break;
+case 5:
+phb_typename = g_strdup(TYPE_PNV_PHB5);
+phb_rootport_typename = g_strdup(TYPE_PNV_PHB5_ROOT_PORT);
+break;
+default:
+g_assert_not_reached();
+}
+
+phb->backend = object_new(phb_typename);
+object_property_add_child(OBJECT(dev), "phb-backend", phb->backend);
+
+/* Passthrough child device properties to the proxy device */
+object_property_set_uint(phb->backend, "index", phb->phb_id, errp);
+object_property_set_uint(phb->backend, "chip-id", phb->chip_id, errp);
+object_property_set_link(phb->backend, "phb-base", OBJECT(phb), errp);
+
+if (phb->version == 3) {
+object_property_set_link(phb->backend, "chip",
+ OBJECT(phb->chip), errp);
+} else {
+object_property_set_link(phb->backend, "pec", OBJECT(phb->pec), errp);
+}
+
+if (!qdev_realize(DEVICE(phb->backend), NULL, errp)) {
+return;
+}
+
+if (phb->version == 3) {
+pnv_phb3_bus_init(dev, PNV_PHB3(phb->backend));
+}
+
+pnv_phb_attach_root_port(pci, phb_rootport_typename,
+ phb->phb_id, phb->chip_id);
+}
+
+static const char *pnv_phb_root_bus_path(PCIHostState *host_bridge,
+ PCIBus *rootbus)
+{
+PnvPHB *phb = PNV_PHB(host_bridge);
+
+snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
+ phb->chip_id, phb->phb_id);
+return phb->bus_path;
+}
+
+static Property pnv_phb_properties[] = {
+DEFINE_PROP_UINT32("index", PnvPHB, phb_id, 0),
+DEFINE_PROP_UINT32("chip-id", PnvPHB, chip_id, 0),
+DEFINE_PROP_UINT32("version", PnvPHB, version, 0),
+
+DEFINE_PROP_LINK("chip", PnvPHB, chip, TYPE_PNV_CHIP, PnvChip *),
+

Re: [PATCH v3 07/12] ppc/pnv: remove pnv-phb3-root-port

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

The unified pnv-phb-root-port can be used in its place. There is no ABI
breakage in doing so because no official QEMU release introduced user
creatable pnv-phb3-root-port devices.

Signed-off-by: Daniel Henrique Barboza 
---



Reviewed-by: Frederic Barrat 

  Fred



  hw/pci-host/pnv_phb.c  |  2 +-
  hw/pci-host/pnv_phb3.c | 42 --
  hw/ppc/pnv.c   |  1 +
  include/hw/pci-host/pnv_phb3.h |  6 -
  4 files changed, 2 insertions(+), 49 deletions(-)

diff --git a/hw/pci-host/pnv_phb.c b/hw/pci-host/pnv_phb.c
index 5e61f85614..cdddc6a389 100644
--- a/hw/pci-host/pnv_phb.c
+++ b/hw/pci-host/pnv_phb.c
@@ -34,7 +34,7 @@ static void pnv_phb_realize(DeviceState *dev, Error **errp)
  switch (phb->version) {
  case 3:
  phb_typename = g_strdup(TYPE_PNV_PHB3);
-phb_rootport_typename = g_strdup(TYPE_PNV_PHB3_ROOT_PORT);
+phb_rootport_typename = g_strdup(TYPE_PNV_PHB_ROOT_PORT);
  break;
  case 4:
  phb_typename = g_strdup(TYPE_PNV_PHB4);
diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index ad5d67a8e8..2966374008 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -1122,51 +1122,9 @@ static const TypeInfo pnv_phb3_root_bus_info = {
  .class_init = pnv_phb3_root_bus_class_init,
  };
  
-static void pnv_phb3_root_port_realize(DeviceState *dev, Error **errp)

-{
-PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
-PCIDevice *pci = PCI_DEVICE(dev);
-Error *local_err = NULL;
-
-rpc->parent_realize(dev, _err);
-if (local_err) {
-error_propagate(errp, local_err);
-return;
-}
-pci_config_set_interrupt_pin(pci->config, 0);
-}
-
-static void pnv_phb3_root_port_class_init(ObjectClass *klass, void *data)
-{
-DeviceClass *dc = DEVICE_CLASS(klass);
-PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
-PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
-
-dc->desc = "IBM PHB3 PCIE Root Port";
-
-device_class_set_parent_realize(dc, pnv_phb3_root_port_realize,
->parent_realize);
-dc->user_creatable = false;
-
-k->vendor_id = PCI_VENDOR_ID_IBM;
-k->device_id = 0x03dc;
-k->revision  = 0;
-
-rpc->exp_offset = 0x48;
-rpc->aer_offset = 0x100;
-}
-
-static const TypeInfo pnv_phb3_root_port_info = {
-.name  = TYPE_PNV_PHB3_ROOT_PORT,
-.parent= TYPE_PCIE_ROOT_PORT,
-.instance_size = sizeof(PnvPHB3RootPort),
-.class_init= pnv_phb3_root_port_class_init,
-};
-
  static void pnv_phb3_register_types(void)
  {
  type_register_static(_phb3_root_bus_info);
-type_register_static(_phb3_root_port_info);
  type_register_static(_phb3_type_info);
  type_register_static(_phb3_iommu_memory_region_info);
  }
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index b7273f386e..d82c66ca6f 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -2108,6 +2108,7 @@ static void pnv_machine_power8_class_init(ObjectClass 
*oc, void *data)
  
  static GlobalProperty phb_compat[] = {

  { TYPE_PNV_PHB, "version", "3" },
+{ TYPE_PNV_PHB_ROOT_PORT, "version", "3" },
  };
  
  mc->desc = "IBM PowerNV (Non-Virtualized) POWER8";

diff --git a/include/hw/pci-host/pnv_phb3.h b/include/hw/pci-host/pnv_phb3.h
index 3b9ff1096a..bff69201d9 100644
--- a/include/hw/pci-host/pnv_phb3.h
+++ b/include/hw/pci-host/pnv_phb3.h
@@ -108,12 +108,6 @@ struct PnvPBCQState {
   */
  #define TYPE_PNV_PHB3_ROOT_BUS "pnv-phb3-root"
  
-#define TYPE_PNV_PHB3_ROOT_PORT "pnv-phb3-root-port"

-
-typedef struct PnvPHB3RootPort {
-PCIESlot parent_obj;
-} PnvPHB3RootPort;
-
  /*
   * PHB3 PCIe Host Bridge for PowerNV machines (POWER8)
   */




Re: [PATCH v3 04/12] ppc/pnv: add PHB4 bus init helper

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

Similar to what we already did for the PnvPHB3 device, let's add a
helper to init the bus when using a PnvPHB4. This helper will be used by
PnvPHb when PnvPHB4 turns into a backend.

Signed-off-by: Daniel Henrique Barboza 
---



Reviewed-by: Frederic Barrat 

  Fred



  hw/pci-host/pnv_phb.c  |  2 ++
  hw/pci-host/pnv_phb4.c | 39 --
  include/hw/pci-host/pnv_phb4.h |  1 +
  3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/hw/pci-host/pnv_phb.c b/hw/pci-host/pnv_phb.c
index 6fefff7d44..abcbcca445 100644
--- a/hw/pci-host/pnv_phb.c
+++ b/hw/pci-host/pnv_phb.c
@@ -69,6 +69,8 @@ static void pnv_phb_realize(DeviceState *dev, Error **errp)
  
  if (phb->version == 3) {

  pnv_phb3_bus_init(dev, PNV_PHB3(phb->backend));
+} else {
+pnv_phb4_bus_init(dev, PNV_PHB4(phb->backend));
  }
  
  pnv_phb_attach_root_port(pci, phb_rootport_typename,

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index d225ab5b0f..a7a4519f30 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1544,30 +1544,16 @@ static void pnv_phb4_instance_init(Object *obj)
  object_initialize_child(obj, "source", >xsrc, TYPE_XIVE_SOURCE);
  }
  
-static void pnv_phb4_realize(DeviceState *dev, Error **errp)

+void pnv_phb4_bus_init(DeviceState *dev, PnvPHB4 *phb)
  {
-PnvPHB4 *phb = PNV_PHB4(dev);
-PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(phb->pec);
  PCIHostState *pci = PCI_HOST_BRIDGE(dev);
-XiveSource *xsrc = >xsrc;
-int nr_irqs;
  char name[32];
  
-/* Set the "big_phb" flag */

-phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3;
-
-/* Controller Registers */
-snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id,
- phb->phb_id);
-memory_region_init_io(>mr_regs, OBJECT(phb), _phb4_reg_ops, phb,
-  name, 0x2000);
-
  /*
   * PHB4 doesn't support IO space. However, qemu gets very upset if
   * we don't have an IO region to anchor IO BARs onto so we just
   * initialize one which we never hook up to anything
   */
-
  snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id,
   phb->phb_id);
  memory_region_init(>pci_io, OBJECT(phb), name, 0x1);
@@ -1577,12 +1563,33 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
  memory_region_init(>pci_mmio, OBJECT(phb), name,
 PCI_MMIO_TOTAL_SIZE);
  
-pci->bus = pci_register_root_bus(dev, dev->id,

+pci->bus = pci_register_root_bus(dev, dev->id ? dev->id : NULL,
   pnv_phb4_set_irq, pnv_phb4_map_irq, phb,
   >pci_mmio, >pci_io,
   0, 4, TYPE_PNV_PHB4_ROOT_BUS);
  pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
  pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
+}
+
+static void pnv_phb4_realize(DeviceState *dev, Error **errp)
+{
+PnvPHB4 *phb = PNV_PHB4(dev);
+PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(phb->pec);
+PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+XiveSource *xsrc = >xsrc;
+int nr_irqs;
+char name[32];
+
+/* Set the "big_phb" flag */
+phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3;
+
+/* Controller Registers */
+snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id,
+ phb->phb_id);
+memory_region_init_io(>mr_regs, OBJECT(phb), _phb4_reg_ops, phb,
+  name, 0x2000);
+
+pnv_phb4_bus_init(dev, phb);
  
  /* Add a single Root port if running with defaults */

  pnv_phb_attach_root_port(pci, pecc->rp_model,
diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index 19dcbd6f87..90843ac3a9 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -157,6 +157,7 @@ struct PnvPHB4 {
  
  void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon);

  int pnv_phb4_pec_get_phb_id(PnvPhb4PecState *pec, int stack_index);
+void pnv_phb4_bus_init(DeviceState *dev, PnvPHB4 *phb);
  extern const MemoryRegionOps pnv_phb4_xscom_ops;
  
  /*




Re: [PATCH v3 01/12] ppc/pnv: add PHB3 bus init helper

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

The PnvPHB3 bus init consists of initializing the pci_io and pci_mmio
regions, registering it via pci_register_root_bus() and then setup the
iommu.

We'll want to init the bus from outside pnv_phb3.c when the bus is
removed from the PnvPHB3 device and put into a new parent PnvPHB device.
The new pnv_phb3_bus_init() helper will be used by the parent to init
the bus when using the PHB3 backend.

Signed-off-by: Daniel Henrique Barboza 
---


Reviewed-by: Frederic Barrat 

  Fred


  hw/pci-host/pnv_phb3.c | 39 --
  include/hw/pci-host/pnv_phb3.h |  1 +
  2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index d58d3c1701..058cbab555 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -986,6 +986,28 @@ static void pnv_phb3_instance_init(Object *obj)
  
  }
  
+void pnv_phb3_bus_init(DeviceState *dev, PnvPHB3 *phb)

+{
+PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+
+/*
+ * PHB3 doesn't support IO space. However, qemu gets very upset if
+ * we don't have an IO region to anchor IO BARs onto so we just
+ * initialize one which we never hook up to anything
+ */
+memory_region_init(>pci_io, OBJECT(phb), "pci-io", 0x1);
+memory_region_init(>pci_mmio, OBJECT(phb), "pci-mmio",
+   PCI_MMIO_TOTAL_SIZE);
+
+pci->bus = pci_register_root_bus(dev,
+ dev->id ? dev->id : NULL,
+ pnv_phb3_set_irq, pnv_phb3_map_irq, phb,
+ >pci_mmio, >pci_io,
+ 0, 4, TYPE_PNV_PHB3_ROOT_BUS);
+
+pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
+}
+
  static void pnv_phb3_realize(DeviceState *dev, Error **errp)
  {
  PnvPHB3 *phb = PNV_PHB3(dev);
@@ -1035,22 +1057,7 @@ static void pnv_phb3_realize(DeviceState *dev, Error 
**errp)
  memory_region_init_io(>mr_regs, OBJECT(phb), _phb3_reg_ops, phb,
"phb3-regs", 0x1000);
  
-/*

- * PHB3 doesn't support IO space. However, qemu gets very upset if
- * we don't have an IO region to anchor IO BARs onto so we just
- * initialize one which we never hook up to anything
- */
-memory_region_init(>pci_io, OBJECT(phb), "pci-io", 0x1);
-memory_region_init(>pci_mmio, OBJECT(phb), "pci-mmio",
-   PCI_MMIO_TOTAL_SIZE);
-
-pci->bus = pci_register_root_bus(dev,
- dev->id ? dev->id : NULL,
- pnv_phb3_set_irq, pnv_phb3_map_irq, phb,
- >pci_mmio, >pci_io,
- 0, 4, TYPE_PNV_PHB3_ROOT_BUS);
-
-pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
+pnv_phb3_bus_init(dev, phb);
  
  pnv_phb_attach_root_port(pci, TYPE_PNV_PHB3_ROOT_PORT,

   phb->phb_id, phb->chip_id);
diff --git a/include/hw/pci-host/pnv_phb3.h b/include/hw/pci-host/pnv_phb3.h
index af6ec83cf6..1375f18fc1 100644
--- a/include/hw/pci-host/pnv_phb3.h
+++ b/include/hw/pci-host/pnv_phb3.h
@@ -164,5 +164,6 @@ uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, 
unsigned size);
  void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned 
size);
  void pnv_phb3_update_regions(PnvPHB3 *phb);
  void pnv_phb3_remap_irqs(PnvPHB3 *phb);
+void pnv_phb3_bus_init(DeviceState *dev, PnvPHB3 *phb);
  
  #endif /* PCI_HOST_PNV_PHB3_H */




Re: [PATCH v3 03/12] ppc/pnv: turn PnvPHB3 into a PnvPHB backend

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

We need a handful of changes that needs to be done in a single swoop to
turn PnvPHB3 into a PnvPHB backend.

In the PnvPHB3, since the PnvPHB device implements PCIExpressHost and
will hold the PCI bus, change PnvPHB3 parent to TYPE_DEVICE. There are a
couple of instances in pnv_phb3.c that needs to access the PCI bus, so a
phb_base pointer is added to allow access to the parent PnvPHB. The
PnvPHB3 root port will now be connected to a PnvPHB object.

In pnv.c, the powernv8 machine chip8 will now hold an array of PnvPHB
objects.  pnv_get_phb3_child() needs to be adapted to return the PnvPHB3
backend from the PnvPHB child. A global property is added in
pnv_machine_power8_class_init() to ensure that all PnvPHBs are created
with phb->version = 3.

After all these changes we're still able to boot a powernv8 machine with
default settings. The real gain will come with user created PnvPHB
devices, coming up next.

Signed-off-by: Daniel Henrique Barboza 
---



A very minor indentation issue below, but other than that:
Reviewed-by: Frederic Barrat 



  hw/pci-host/pnv_phb3.c | 27 +--
  hw/ppc/pnv.c   | 21 +++--
  include/hw/pci-host/pnv_phb3.h |  5 -
  include/hw/ppc/pnv.h   |  3 ++-
  4 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index 058cbab555..ad5d67a8e8 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -11,6 +11,7 @@
  #include "qapi/visitor.h"
  #include "qapi/error.h"
  #include "hw/pci-host/pnv_phb3_regs.h"
+#include "hw/pci-host/pnv_phb.h"
  #include "hw/pci-host/pnv_phb3.h"
  #include "hw/pci/pcie_host.h"
  #include "hw/pci/pcie_port.h"
@@ -26,7 +27,7 @@
  
  static PCIDevice *pnv_phb3_find_cfg_dev(PnvPHB3 *phb)

  {
-PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+PCIHostState *pci = PCI_HOST_BRIDGE(phb->phb_base);
  uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
  uint8_t bus, devfn;
  
@@ -590,7 +591,7 @@ void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size)

  uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size)
  {
  PnvPHB3 *phb = opaque;
-PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+PCIHostState *pci = PCI_HOST_BRIDGE(phb->phb_base);
  uint64_t val;
  
  if ((off & 0xfffc) == PHB_CONFIG_DATA) {

@@ -1011,7 +1012,6 @@ void pnv_phb3_bus_init(DeviceState *dev, PnvPHB3 *phb)
  static void pnv_phb3_realize(DeviceState *dev, Error **errp)
  {
  PnvPHB3 *phb = PNV_PHB3(dev);
-PCIHostState *pci = PCI_HOST_BRIDGE(dev);
  PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
  int i;
  
@@ -1056,11 +1056,6 @@ static void pnv_phb3_realize(DeviceState *dev, Error **errp)

  /* Controller Registers */
  memory_region_init_io(>mr_regs, OBJECT(phb), _phb3_reg_ops, phb,
"phb3-regs", 0x1000);
-
-pnv_phb3_bus_init(dev, phb);
-
-pnv_phb_attach_root_port(pci, TYPE_PNV_PHB3_ROOT_PORT,
- phb->phb_id, phb->chip_id);
  }
  
  void pnv_phb3_update_regions(PnvPHB3 *phb)

@@ -1085,38 +1080,26 @@ void pnv_phb3_update_regions(PnvPHB3 *phb)
  pnv_phb3_check_all_m64s(phb);
  }
  
-static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge,

-  PCIBus *rootbus)
-{
-PnvPHB3 *phb = PNV_PHB3(host_bridge);
-
-snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
- phb->chip_id, phb->phb_id);
-return phb->bus_path;
-}
-
  static Property pnv_phb3_properties[] = {
  DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0),
  DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0),
  DEFINE_PROP_LINK("chip", PnvPHB3, chip, TYPE_PNV_CHIP, PnvChip *),
+DEFINE_PROP_LINK("phb-base", PnvPHB3, phb_base, TYPE_PNV_PHB, PnvPHB *),
  DEFINE_PROP_END_OF_LIST(),
  };
  
  static void pnv_phb3_class_init(ObjectClass *klass, void *data)

  {
-PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
  DeviceClass *dc = DEVICE_CLASS(klass);
  
-hc->root_bus_path = pnv_phb3_root_bus_path;

  dc->realize = pnv_phb3_realize;
  device_class_set_props(dc, pnv_phb3_properties);
-set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
  dc->user_creatable = false;
  }
  
  static const TypeInfo pnv_phb3_type_info = {

  .name  = TYPE_PNV_PHB3,
-.parent= TYPE_PCIE_HOST_BRIDGE,
+.parent = TYPE_DEVICE,


Missing spaces here ^^
  Fred



  .instance_size = sizeof(PnvPHB3),
  .class_init= pnv_phb3_class_init,
  .instance_init = pnv_phb3_instance_init,
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index d3f77c8367..1df91971b8 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -43,6 +43,7 @@
  #include "hw/ipmi/ipmi.h"
  #include "target/ppc/mmu-hash64.h"
  #include "hw/pci/msi.h"
+#include "hw/pci-host/pnv_phb.h"
  
  #include "hw/ppc/xics.h"

  

Re: [PATCH v3 00/12] powernv: introduce pnv-phb base/proxy devices

2022-07-27 Thread Frederic Barrat




On 24/06/2022 10:49, Daniel Henrique Barboza wrote:

Hi,

This is the version 3 of the pnv-phb proxy device which has the
following main differences from v2:

- it's rebased on top of "[PATCH v3 0/8] pnv-phb related cleanups"
- it doesn't have any patches related to user-created devices

There is no user visible change made here yet. We're making device
changes that are effective using default settings.

Changes from v2:
- all related changes made with the rebase on top of "[PATCH v3 0/8]
pnv-phb related cleanups"
- the following user devices patches were removed:
   - ppc/pnv: user created pnv-phb for powernv8
   - ppc/pnv: user created pnv-phb for powernv9
   - ppc/pnv: change pnv_phb4_get_pec() to also retrieve chip10->pecs
   - ppc/pnv: user creatable pnv-phb for powernv10
- v2 link: https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg06254.html



This series look pretty good to me! I only have a couple of minor 
comments, which I don't think are worth a resend.


  Fred




Daniel Henrique Barboza (12):
   ppc/pnv: add PHB3 bus init helper
   ppc/pnv: add PnvPHB base/proxy device
   ppc/pnv: turn PnvPHB3 into a PnvPHB backend
   ppc/pnv: add PHB4 bus init helper
   ppc/pnv: turn PnvPHB4 into a PnvPHB backend
   ppc/pnv: add pnv-phb-root-port device
   ppc/pnv: remove pnv-phb3-root-port
   ppc/pnv: remove pnv-phb4-root-port
   ppc/pnv: remove root port name from pnv_phb_attach_root_port()
   ppc/pnv: remove pecc->rp_model
   ppc/pnv: remove PnvPHB4.version
   ppc/pnv: move attach_root_port helper to pnv-phb.c

  hw/pci-host/meson.build|   3 +-
  hw/pci-host/pnv_phb.c  | 244 +
  hw/pci-host/pnv_phb.h  |  55 
  hw/pci-host/pnv_phb3.c | 106 --
  hw/pci-host/pnv_phb4.c | 144 ---
  hw/pci-host/pnv_phb4_pec.c |   5 +-
  hw/ppc/pnv.c   |  68 -
  include/hw/pci-host/pnv_phb3.h |  12 +-
  include/hw/pci-host/pnv_phb4.h |  18 +--
  include/hw/ppc/pnv.h   |   5 +-
  10 files changed, 401 insertions(+), 259 deletions(-)
  create mode 100644 hw/pci-host/pnv_phb.c
  create mode 100644 hw/pci-host/pnv_phb.h





Re: [PATCH] contrib/vhost-user-blk: Clean up deallocation of VuVirtqElement

2022-07-27 Thread Raphael Norwitz
On Tue, Jul 26, 2022 at 03:57:42PM +0100, Peter Maydell wrote:
> On Fri, 1 Jul 2022 at 06:41, Markus Armbruster  wrote:
> > Could we use a contrib/README with an explanation what "contrib" means,
> > and how to build and use the stuff there?
> 
> I would rather we got rid of contrib/ entirely. Our git repo
> should contain things we care about enough to really support
> and believe in, in which case they should be in top level
> directories matching what they are (eg tools/). If we don't
> believe in these things enough to really support them, then
> we should drop them, and let those who do care maintain them
> as out-of-tree tools if they like.
>

I can't speak for a lot of stuff in contrib/ but I find the vhost-user
backends like vhost-user-blk and vhost-user-scsi helpful for testing and
development. I would like to keep maintaining those two at least.

> subprojects/ is similarly vague.
> 

Again, I can't say much for other stuff in subprojects/ but
libvhost-user is clearly important. Maybe we move libvhost-user to
another directroy and the libvhost-user based backends there too?

> thanks
> -- PMM


Re: [PATCH 1/8] tests/docker: Fix alpine dockerfile

2022-07-27 Thread Lucas Mateus Martins Araujo e Castro


On 27/07/2022 14:09, Daniel P. Berrangé wrote:


On Wed, Jul 27, 2022 at 01:36:25PM -0300, Lucas Mateus Castro(alqotel) wrote:

Currently the run script uses 'readlink -e' but the image only has the
busybox readlink, this commit add the coreutils package which
contains the readlink with the '-e' option.

Use of 'readlink' is discouraged in favour of 'realpath'. AFAICT, we
can just do that change and not need the '-e' flag anyway.

So a patch just changing
-BASE="$(dirname $(readlink -e $0))"
+BASE="$(dirname $(realpath $0))"

Ok, I'll send it with v2.




Signed-off-by: Lucas Mateus Castro(alqotel)
---
  tests/docker/dockerfiles/alpine.docker | 1 +
  1 file changed, 1 insertion(+)

diff --git a/tests/docker/dockerfiles/alpine.docker 
b/tests/docker/dockerfiles/alpine.docker
index 3f4c0f95cb..2943a99730 100644
--- a/tests/docker/dockerfiles/alpine.docker
+++ b/tests/docker/dockerfiles/alpine.docker
@@ -21,6 +21,7 @@ RUN apk update && \
  cdrkit \
  ceph-dev \
  clang \
+coreutils \
  ctags \
  curl-dev \
  cyrus-sasl-dev \

This file contents is autogenerated, so editting it manually is
wrong and changes will be lost.


True, that was one of the reasons I had problems with patch 8 (it 
changes the dockerfiles directly) but forgot that it'd apply here as well.


Thanks,
--
Lucas Mateus M. Araujo e Castro
Instituto de Pesquisas ELDORADO 


Departamento Computação Embarcada
Analista de Software Trainee
Aviso Legal - Disclaimer 


Re: [PATCH] virtio-pci: don't touch pci on virtio reset

2022-07-27 Thread Claudio Fontana
testing it now...

Thanks!

C


On 7/27/22 18:14, Michael S. Tsirkin wrote:
> virtio level reset should not affect pci express
> registers such as PM, error or link.
> 
> Fixes: 27ce0f3afc ("hw/virtio: fix Power Management Control Register for PCI 
> Express virtio devices")
> Fixes: d584f1b9ca ("hw/virtio: fix Link Control Register for PCI Express 
> virtio devices")
> Fixes: c2cabb3422 ("hw/virtio: fix error enabling flags in Device Control 
> register")
> Cc: "Marcel Apfelbaum" 
> Signed-off-by: Michael S. Tsirkin 
> ---
>  hw/virtio/virtio-pci.c | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 45327f0b31..3189ec014d 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1955,6 +1955,13 @@ static void virtio_pci_reset(DeviceState *qdev)
>  proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
>  proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
>  }
> +}
> +
> +static void virtio_pci_bus_reset(DeviceState *qdev)
> +{
> +PCIDevice *dev = PCI_DEVICE(qdev);
> +
> +virtio_pci_reset(qdev);
>  
>  if (pci_is_express(dev)) {
>  pcie_cap_deverr_reset(dev);
> @@ -2022,7 +2029,7 @@ static void virtio_pci_class_init(ObjectClass *klass, 
> void *data)
>  k->class_id = PCI_CLASS_OTHERS;
>  device_class_set_parent_realize(dc, virtio_pci_dc_realize,
>  >parent_dc_realize);
> -dc->reset = virtio_pci_reset;
> +dc->reset = virtio_pci_bus_reset;
>  }
>  
>  static const TypeInfo virtio_pci_info = {




Re: [PATCH] block/io_uring: add missing include file

2022-07-27 Thread Kevin Wolf
Am 21.07.2022 um 09:09 hat Stefano Garzarella geschrieben:
> On Thu, Jul 21, 2022 at 02:56:45PM +0800, Jinhao Fan wrote:
> > The commit "Use io_uring_register_ring_fd() to skip fd operations" uses
> > warn_report but did not include the header file "qemu/error-report.h".
> > This causes "error: implicit declaration of function ‘warn_report’".
> > Include this header file.
> > 
> 
> We could add:
> 
> Fixes: e2848bc574 ("Use io_uring_register_ring_fd() to skip fd operations")
> 
> > Signed-off-by: Jinhao Fan 
> > ---
> > block/io_uring.c | 1 +
> > 1 file changed, 1 insertion(+)
> > 
> > diff --git a/block/io_uring.c b/block/io_uring.c
> > index f8a19fd97f..a1760152e0 100644
> > --- a/block/io_uring.c
> > +++ b/block/io_uring.c
> > @@ -11,6 +11,7 @@
> > #include "qemu/osdep.h"
> > #include 
> > #include "block/aio.h"
> > +#include "qemu/error-report.h"
> > #include "qemu/queue.h"
> > #include "block/block.h"
> > #include "block/raw-aio.h"
> > -- 
> > 2.25.1
> > 
> > 
> 
> Thanks for the fix:
> 
> Reviewed-by: Stefano Garzarella 

Thanks, applied to the block branch.

Kevin




Re: [PATCH 1/8] tests/docker: Fix alpine dockerfile

2022-07-27 Thread Daniel P . Berrangé
On Wed, Jul 27, 2022 at 01:36:25PM -0300, Lucas Mateus Castro(alqotel) wrote:
> Currently the run script uses 'readlink -e' but the image only has the
> busybox readlink, this commit add the coreutils package which
> contains the readlink with the '-e' option.

Use of 'readlink' is discouraged in favour of 'realpath'. AFAICT, we
can just do that change and not need the '-e' flag anyway.

> 
> Signed-off-by: Lucas Mateus Castro(alqotel) 
> ---
>  tests/docker/dockerfiles/alpine.docker | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/tests/docker/dockerfiles/alpine.docker 
> b/tests/docker/dockerfiles/alpine.docker
> index 3f4c0f95cb..2943a99730 100644
> --- a/tests/docker/dockerfiles/alpine.docker
> +++ b/tests/docker/dockerfiles/alpine.docker
> @@ -21,6 +21,7 @@ RUN apk update && \
>  cdrkit \
>  ceph-dev \
>  clang \
> +coreutils \
>  ctags \
>  curl-dev \
>  cyrus-sasl-dev \

This file contents is autogenerated, so editting it manually is
wrong and changes will be lost.


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[RFC PATCH 8/8] tests/docker: Selective line reading by python script

2022-07-27 Thread Lucas Mateus Castro(alqotel)
Building some images failed on ppc64le because the dockerfile tried to
install some packages that are only available in x86 and arm64, to solve
this while still having those packages be available in those architectures
a comment was put before the installation command to instruct the python
script into ignoring those lines for some architectures (in this case
ppc64le)

Overall I'm not a big fan of the way I solved this problem, so I'd like
to know if anyone has a better way to make these dockerfilse work in
PPC64LE.

For context the base images used here are available in PPC64LE but some
of the packages installed are not (in alpine's case it's XEN, which is
only available to x86 and ARM), so this patch create a ignore_list which
is set on a per-architecture basis, and any packages in a dockerfile in
this ignore_list will not be copied to the temporary dockerfile used in
the docker command.

Signed-off-by: Lucas Mateus Castro(alqotel) 
---
 tests/docker/docker.py | 15 ---
 tests/docker/dockerfiles/alpine.docker |  2 ++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/docker/docker.py b/tests/docker/docker.py
index d0af2861b8..9b962d1c78 100755
--- a/tests/docker/docker.py
+++ b/tests/docker/docker.py
@@ -14,6 +14,7 @@
 import os
 import sys
 import subprocess
+import platform
 import json
 import hashlib
 import atexit
@@ -207,8 +208,15 @@ def _read_qemu_dockerfile(img_name):
 
 def _dockerfile_preprocess(df):
 out = ""
+ignore_list = []
 for l in df.splitlines():
-if len(l.strip()) == 0 or l.startswith("#"):
+if len(l.strip()) == 0:
+continue
+if l.startswith("#"):
+if len(l.split()) >= 3:
+if l.split()[1] == "ignore":
+if platform.processor() in l.split()[2].split(','):
+ignore_list += l.split()[3].split(',')
 continue
 from_pref = "FROM qemu/"
 if l.startswith(from_pref):
@@ -219,7 +227,8 @@ def _dockerfile_preprocess(df):
 inlining = _read_qemu_dockerfile(l[len(from_pref):])
 out += _dockerfile_preprocess(inlining)
 continue
-out += l + "\n"
+if not any(x in l.split() for x in ignore_list):
+out += l + "\n"
 return out
 
 
@@ -330,7 +339,7 @@ def build_image(self, tag, docker_dir, dockerfile,
 tmp_df = tempfile.NamedTemporaryFile(mode="w+t",
  encoding='utf-8',
  dir=docker_dir, suffix=".docker")
-tmp_df.write(dockerfile)
+tmp_df.write(_dockerfile_preprocess(dockerfile))
 
 if user:
 uid = os.getuid()
diff --git a/tests/docker/dockerfiles/alpine.docker 
b/tests/docker/dockerfiles/alpine.docker
index 2943a99730..5cec46d8f2 100644
--- a/tests/docker/dockerfiles/alpine.docker
+++ b/tests/docker/dockerfiles/alpine.docker
@@ -6,6 +6,8 @@
 
 FROM docker.io/library/alpine:edge
 
+# Lines to by ignored when this file is read by the python script
+# ignore ppc64le,ppc64 xen-dev
 RUN apk update && \
 apk upgrade && \
 apk add \
-- 
2.25.1




[PATCH 6/8] scripts/ci/setup: Add Fedora to build-environment.yml

2022-07-27 Thread Lucas Mateus Castro(alqotel)
Minicloud doesn't have a RHEL image, but it does have Fedora 34 and 35
images and both use DNF as package manager, so just change the ansible facts
to check if it's RHEL or Fedora

Signed-off-by: Lucas Mateus Castro(alqotel) 
---
 scripts/ci/setup/build-environment.yml | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/scripts/ci/setup/build-environment.yml 
b/scripts/ci/setup/build-environment.yml
index 43cf8c759f..a7d53d0f70 100644
--- a/scripts/ci/setup/build-environment.yml
+++ b/scripts/ci/setup/build-environment.yml
@@ -165,8 +165,10 @@
   - zlib-devel
 state: present
   when:
-- ansible_facts['distribution_file_variety'] == 'RedHat'
-- ansible_facts['distribution_version'] == '8'
+- |
+   (ansible_facts['distribution'] == 'RedHat' and
+ansible_facts['distribution_version'] == '8') or
+ansible_facts['distribution'] == 'Fedora'
 
 - name: Install packages only available on x86 and aarch64
   dnf:
@@ -175,6 +177,8 @@
   - spice-server
 state: present
   when:
-- ansible_facts['distribution_file_variety'] == 'RedHat'
-- ansible_facts['distribution_version'] == '8'
+- |
+   (ansible_facts['distribution'] == 'RedHat' and
+ansible_facts['distribution_version'] == '8') or
+ansible_facts['distribution'] == 'Fedora'
 - ansible_facts['architecture'] == 'aarch64' or 
ansible_facts['architecture'] == 'x86_64'
-- 
2.25.1




[PATCH 2/8] scripts/ci/setup: ninja missing from build-environment

2022-07-27 Thread Lucas Mateus Castro(alqotel)
ninja-build is missing from the RHEL environment, so a system prepared
with that script would still fail to compile QEMU.
Tested on a Fedora 36

Signed-off-by: Lucas Mateus Castro(alqotel) 
---
 scripts/ci/setup/build-environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/ci/setup/build-environment.yml 
b/scripts/ci/setup/build-environment.yml
index 232525b91d..6df3e61d94 100644
--- a/scripts/ci/setup/build-environment.yml
+++ b/scripts/ci/setup/build-environment.yml
@@ -150,6 +150,7 @@
   - libepoxy-devel
   - libgcrypt-devel
   - lzo-devel
+  - ninja-build
   - make
   - mesa-libEGL-devel
   - nettle-devel
-- 
2.25.1




[PATCH 3/8] scripts/ci/setup: Fix libxen requirements

2022-07-27 Thread Lucas Mateus Castro(alqotel)
XEN hypervisor is only available in ARM and x86, but the yaml only
checked if the architecture is different from s390x, changed it to
a more accurate test.
Tested this change on a Ubuntu 20.04 ppc64le.

Signed-off-by: Lucas Mateus Castro(alqotel) 
---
 scripts/ci/setup/build-environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci/setup/build-environment.yml 
b/scripts/ci/setup/build-environment.yml
index 6df3e61d94..7535228685 100644
--- a/scripts/ci/setup/build-environment.yml
+++ b/scripts/ci/setup/build-environment.yml
@@ -97,7 +97,7 @@
 state: present
   when:
 - ansible_facts['distribution'] == 'Ubuntu'
-- ansible_facts['architecture'] != 's390x'
+- ansible_facts['architecture'] == 'aarch64' or 
ansible_facts['architecture'] == 'x86_64'
 
 - name: Install basic packages to build QEMU on Ubuntu 20.04
   package:
-- 
2.25.1




[PATCH 7/8] scripts/ci/setup: Added debian to build-environment.yml

2022-07-27 Thread Lucas Mateus Castro(alqotel)
Minicloud has a PPC64 BE Debian11 image which can be used for the CI,
so add Debian to the build-environment.yml so it can be configured with
ansible-playbook.

Signed-off-by: Lucas Mateus Castro(alqotel) 
---
 scripts/ci/setup/build-environment.yml | 31 +-
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/scripts/ci/setup/build-environment.yml 
b/scripts/ci/setup/build-environment.yml
index a7d53d0f70..b5d415496f 100644
--- a/scripts/ci/setup/build-environment.yml
+++ b/scripts/ci/setup/build-environment.yml
@@ -31,9 +31,11 @@
 update_cache: yes
 upgrade: yes
   when:
-- ansible_facts['distribution'] == 'Ubuntu'
+- |
+ansible_facts['distribution'] == 'Ubuntu' or
+ansible_facts['distribution'] == 'Debian'
 
-- name: Install basic packages to build QEMU on Ubuntu 20.04
+- name: Install basic packages to build QEMU on Ubuntu 20.04 or Debian11
   package:
 name:
   - ccache
@@ -56,7 +58,6 @@
   - libibverbs-dev
   - libiscsi-dev
   - libjemalloc-dev
-  - libjpeg-turbo8-dev
   - liblzo2-dev
   - libncurses5-dev
   - libncursesw5-dev
@@ -86,17 +87,37 @@
   - sparse
   - xfslibs-dev
 state: present
+  when:
+- |
+ansible_facts['distribution'] == 'Ubuntu' or
+ansible_facts['distribution'] == 'Debian'
+
+- name: Install Ubuntu exclusive packages to build QEMU
+  package:
+name:
+  - libjpeg-turbo8-dev
+state: present
   when:
 - ansible_facts['distribution'] == 'Ubuntu'
 
-- name: Install packages to build QEMU on Ubuntu 20.04 on non-s390x
+- name: Install Debian exclusive packages to build QEMU
+  package:
+name:
+  - libjpeg62-turbo-dev
+state: present
+  when:
+- ansible_facts['distribution'] == 'Debian'
+
+- name: Install packages to build QEMU on Ubuntu 20.04 or Debian11 on 
non-s390x
   package:
 name:
   - libspice-server-dev
   - libxen-dev
 state: present
   when:
-- ansible_facts['distribution'] == 'Ubuntu'
+- |
+ansible_facts['distribution'] == 'Ubuntu' or
+ansible_facts['distribution'] == 'Debian'
 - ansible_facts['architecture'] == 'aarch64' or 
ansible_facts['architecture'] == 'x86_64'
 
 - name: Install basic packages to build QEMU on Ubuntu 20.04
-- 
2.25.1




[PATCH 5/8] scripts/ci/setup: Add ppc64le to vars.yml template

2022-07-27 Thread Lucas Mateus Castro(alqotel)
Added ppc64le so that the gitlab-runner.yml could be used to set up
ppc64le runners.

Signed-off-by: Lucas Mateus Castro(alqotel) 
---
 scripts/ci/setup/vars.yml.template | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/ci/setup/vars.yml.template 
b/scripts/ci/setup/vars.yml.template
index e48089761f..2c84698b87 100644
--- a/scripts/ci/setup/vars.yml.template
+++ b/scripts/ci/setup/vars.yml.template
@@ -8,5 +8,6 @@ ansible_to_gitlab_arch:
   x86_64: amd64
   aarch64: arm64
   s390x: s390x
+  ppc64le: ppc64le
 # A unique token made available by GitLab to your project for registering 
runners
 gitlab_runner_registration_token: PLEASE_PROVIDE_A_VALID_TOKEN
-- 
2.25.1




[PATCH 1/8] tests/docker: Fix alpine dockerfile

2022-07-27 Thread Lucas Mateus Castro(alqotel)
Currently the run script uses 'readlink -e' but the image only has the
busybox readlink, this commit add the coreutils package which
contains the readlink with the '-e' option.

Signed-off-by: Lucas Mateus Castro(alqotel) 
---
 tests/docker/dockerfiles/alpine.docker | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/docker/dockerfiles/alpine.docker 
b/tests/docker/dockerfiles/alpine.docker
index 3f4c0f95cb..2943a99730 100644
--- a/tests/docker/dockerfiles/alpine.docker
+++ b/tests/docker/dockerfiles/alpine.docker
@@ -21,6 +21,7 @@ RUN apk update && \
 cdrkit \
 ceph-dev \
 clang \
+coreutils \
 ctags \
 curl-dev \
 cyrus-sasl-dev \
-- 
2.25.1




Re: [PATCH v3 05/21] block/vhost-user-blk-server: don't expose VHOST_USER_F_PROTOCOL_FEATURES

2022-07-27 Thread Kevin Wolf
Am 26.07.2022 um 21:21 hat Alex Bennée geschrieben:
> This bit is unused in actual VirtIO feature negotiation and should
> only appear in the vhost-user messages between master and slave.
> 
> [AJB: experiment, this doesn't break the tests but I'm not super
> confident of the range of tests]
> 
> Signed-off-by: Alex Bennée 

I guess the rationale is that this doesn't make a difference because
vu_get_features_exec(), which is the only caller of .get_features(),
adds the VHOST_USER_F_PROTOCOL_FEATURES bit back before sending a
response.

Can you please add this to the commit message?

>  block/export/vhost-user-blk-server.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/block/export/vhost-user-blk-server.c 
> b/block/export/vhost-user-blk-server.c
> index 3409d9e02e..d31436006d 100644
> --- a/block/export/vhost-user-blk-server.c
> +++ b/block/export/vhost-user-blk-server.c
> @@ -125,8 +125,7 @@ static uint64_t vu_blk_get_features(VuDev *dev)
> 1ull << VIRTIO_BLK_F_MQ |
> 1ull << VIRTIO_F_VERSION_1 |
> 1ull << VIRTIO_RING_F_INDIRECT_DESC |
> -   1ull << VIRTIO_RING_F_EVENT_IDX |
> -   1ull << VHOST_USER_F_PROTOCOL_FEATURES;
> +   1ull << VIRTIO_RING_F_EVENT_IDX ;

This has an extra space before the semicolon.

Kevin




[PATCH 4/8] scripts/ci/setup: spice-server only on x86 aarch64

2022-07-27 Thread Lucas Mateus Castro(alqotel)
Changed build-environment.yml to only install spice-server on x86_64 and
aarch64 as this package is only available on those architectures.

Signed-off-by: Lucas Mateus Castro(alqotel) 
---
 scripts/ci/setup/build-environment.yml | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/scripts/ci/setup/build-environment.yml 
b/scripts/ci/setup/build-environment.yml
index 7535228685..43cf8c759f 100644
--- a/scripts/ci/setup/build-environment.yml
+++ b/scripts/ci/setup/build-environment.yml
@@ -160,7 +160,6 @@
   - python36
   - rdma-core-devel
   - spice-glib-devel
-  - spice-server
   - systemtap-sdt-devel
   - tar
   - zlib-devel
@@ -168,3 +167,14 @@
   when:
 - ansible_facts['distribution_file_variety'] == 'RedHat'
 - ansible_facts['distribution_version'] == '8'
+
+- name: Install packages only available on x86 and aarch64
+  dnf:
+# Spice server not available in ppc64le
+name:
+  - spice-server
+state: present
+  when:
+- ansible_facts['distribution_file_variety'] == 'RedHat'
+- ansible_facts['distribution_version'] == '8'
+- ansible_facts['architecture'] == 'aarch64' or 
ansible_facts['architecture'] == 'x86_64'
-- 
2.25.1




[PATCH 0/8] Patch series to set up a ppc64le CI

2022-07-27 Thread Lucas Mateus Castro(alqotel)
This patch series aim to make easier to set up a compilation and CI
environment in PPC64 and PPC64LE machines.

The first 2 patches is a fix not related to ppc64.
Patch 3 and 4 also affect some other architectures.
Patches 5 to 7 are adding Power specific additions.

Patch 8 is a RFC for a current way to run the docker tests in PPC64LE.

Lucas Mateus Castro(alqotel) (8):
  tests/docker: Fix alpine dockerfile
  scripts/ci/setup: ninja missing from build-environment
  scripts/ci/setup: Fix libxen requirements
  scripts/ci/setup: spice-server only on x86 aarch64
  scripts/ci/setup: Add ppc64le to vars.yml template
  scripts/ci/setup: Add Fedora to build-environment.yml
  scripts/ci/setup: Added debian to build-environment.yml
  tests/docker: Selective line reading by python script

 scripts/ci/setup/build-environment.yml | 54 +-
 scripts/ci/setup/vars.yml.template |  1 +
 tests/docker/docker.py | 15 +--
 tests/docker/dockerfiles/alpine.docker |  3 ++
 4 files changed, 61 insertions(+), 12 deletions(-)

-- 
2.25.1




Re: virtio: why no full reset on virtio_set_status 0 ?

2022-07-27 Thread Michael S. Tsirkin
On Wed, Jul 27, 2022 at 12:51:31PM +0200, Claudio Fontana wrote:
> Hi Michael and all,
> 
> I have started researching a qemu / ovs / dpdk bug:
> 
> https://inbox.dpdk.org/dev/322122fb-619d-96f6-5c3e-9eabdbf38...@redhat.com/T/
> 
> that seems to be affecting multiple parties in the telco space,
> 
> and during this process I noticed that qemu/hw/virtio/virtio.c does not do a 
> full virtio reset
> in virtio_set_status, when receiving a status value of 0.
> 
> It seems it has always been this way, so I am clearly missing / forgetting 
> something basic,
> 
> I checked the virtio spec at https://docs.oasis-open.org/
> 
> and from:
> 
> "
> 4.1.4.3 Common configuration structure layout
> 
> device_status
> The driver writes the device status here (see 2.1). Writing 0 into this field 
> resets the device.
> 
> "
> 
> and
> 
> "
> 2.4.1 Device Requirements: Device Reset
> A device MUST reinitialize device status to 0 after receiving a reset.
> "
> 
> I would conclude that in virtio.c::virtio_set_status we should 
> unconditionally do a full virtio_reset.
> 
> Instead, we have just the check:
> 
> if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
> (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
> virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
> }
> 
> which just sets the started field,
> 
> and then we have the call to the virtio device class set_status 
> (virtio_net...),
> but the VirtioDevice is not fully reset, as per the virtio_reset() call we 
> are missing:
> 
> "
> vdev->start_on_kick = false;
> vdev->started = false;
> vdev->broken = false;
> vdev->guest_features = 0;
> vdev->queue_sel = 0;
> vdev->status = 0;
> vdev->disabled = false;
> qatomic_set(>isr, 0);
> vdev->config_vector = VIRTIO_NO_VECTOR;
> virtio_notify_vector(vdev, vdev->config_vector);
> 
> for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
> ... initialize vdev->vq[i] ...
> }
> "
> 
> Doing a full reset seems to fix the problem for me, so I can send tentative 
> patches if necessary,
> but what am I missing here?
> 
> Thanks,
> 
> Claudio

I went looking at this code and found a bug, so
I just sent a patch fixing it:
https://lore.kernel.org/r/20220727161445.21428-1-mst%40redhat.com

no idea whether it's related but can't hurt to try.


> -- 
> Claudio Fontana
> Engineering Manager Virtualization, SUSE Labs Core
> 
> SUSE Software Solutions Italy Srl




[PATCH] virtio-pci: don't touch pci on virtio reset

2022-07-27 Thread Michael S. Tsirkin
virtio level reset should not affect pci express
registers such as PM, error or link.

Fixes: 27ce0f3afc ("hw/virtio: fix Power Management Control Register for PCI 
Express virtio devices")
Fixes: d584f1b9ca ("hw/virtio: fix Link Control Register for PCI Express virtio 
devices")
Fixes: c2cabb3422 ("hw/virtio: fix error enabling flags in Device Control 
register")
Cc: "Marcel Apfelbaum" 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio-pci.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 45327f0b31..3189ec014d 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1955,6 +1955,13 @@ static void virtio_pci_reset(DeviceState *qdev)
 proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
 proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
 }
+}
+
+static void virtio_pci_bus_reset(DeviceState *qdev)
+{
+PCIDevice *dev = PCI_DEVICE(qdev);
+
+virtio_pci_reset(qdev);
 
 if (pci_is_express(dev)) {
 pcie_cap_deverr_reset(dev);
@@ -2022,7 +2029,7 @@ static void virtio_pci_class_init(ObjectClass *klass, 
void *data)
 k->class_id = PCI_CLASS_OTHERS;
 device_class_set_parent_realize(dc, virtio_pci_dc_realize,
 >parent_dc_realize);
-dc->reset = virtio_pci_reset;
+dc->reset = virtio_pci_bus_reset;
 }
 
 static const TypeInfo virtio_pci_info = {
-- 
MST




Re: [PATCH v2 00/11] Refactor bdrv_try_set_aio_context using transactions

2022-07-27 Thread Vladimir Sementsov-Ogievskiy

On 7/25/22 15:21, Emanuele Giuseppe Esposito wrote:

The aim of this series is to reorganize bdrv_try_set_aio_context
and drop BDS ->set_aio_context and ->can_set_aio_ctx callbacks in
favour of a new one, ->change_aio_ctx.

More informations in patch 3 (which is also RFC, due to the doubts
I have with AioContext locks).

Patch 1 just add assertions in the code, 2 extends the transactions API to be 
able to add also transactions in the tail
of the list.
Patch 3 is the core of this series, and introduces the new callback.
It is marked as RFC and the reason is explained in the commit message.
Patches 4-5-6 implement ->change_aio_ctx in the various block, blockjob
and block-backend BDSes.
Patch 7 substitutes ->change_aio_ctx with the old callbacks, and
patch 8 takes care of deleting the old callbacks and unused code.

This series is based on "job: replace AioContext lock with job_mutex",
but just because it uses job_set_aio_context() introduced there.

Suggested-by: Paolo Bonzini
Based-on:<20220706201533.289775-1-eespo...@redhat.com>



What I dislike here is that you refactor aio-context-change to use transactions, but you 
use it "internally" for aio-context-change. aio-context-change doesn't become a 
native part of block-graph modifiction transaction system after the series.

For example, bdrv_attach_child_common(..., tran) still calls 
bdrv_try_change_aio_context() function which doesn't take @tran argument. And 
we have to call bdrv_try_change_aio_context() again in 
bdrv_attach_child_common_abort() with opposite arguments by hand. We create in 
_try_ separate Transaction object which is unrelated to the original 
block-graph-change transaction.

With good refactoring we should get rid of these _try_ functions, and have just 
bdrv_change_aio_context(..., tran) that can be natively used with external tran 
object, where other block-graph change actions participate. This way we'll not 
have to call reverse aio_context_change() in .abort, it will be done 
automatically.

Moreover, your  *aio_context_change* functions that do have tran parameter 
cannot be simply used in the block-graph change transaction, as you don't 
follow the common paradigm, that in .prepare we do all visible changes. That's 
why you have to still use _try_*() version that creates seaparate transaction 
object and completes it: after that the action is actually done and other 
graph-modifying actions can be done on top.

So, IMHO, we shouldn't go this way, as that adds transaction actions that 
actually can't be used in common block-graph-modifying transaction but only 
context of bdrv_try_change_aio_context() internal transaction.

I agree that aio-context change should finally be rewritten to take a native 
place in block-graph transactions, but that should be a complete solution, 
introducing native bdrv_change_aio_context(..., tran) transaction action that 
is directly used in the block-graph transaction, do visible effect in .prepare 
and don't create extra Transaction objects.

--
Best regards,
Vladimir



Re: [RFC v4 5/9] qemu-iotests: test new zone operations.

2022-07-27 Thread Ming Lei
On Wed, Jul 27, 2022 at 10:34:56AM -0400, Stefan Hajnoczi wrote:
> On Mon, 11 Jul 2022 at 22:21, Sam Li  wrote:
> >
> > We have added new block layer APIs of zoned block devices. Test it with:
> > (1) Create a null_blk device, run each zone operation on it and see
> > whether reporting right zone information.
> >
> > Signed-off-by: Sam Li 
> > ---
> >  tests/qemu-iotests/tests/zoned.sh | 69 +++
> >  1 file changed, 69 insertions(+)
> >  create mode 100755 tests/qemu-iotests/tests/zoned.sh
> >
> > diff --git a/tests/qemu-iotests/tests/zoned.sh 
> > b/tests/qemu-iotests/tests/zoned.sh
> > new file mode 100755
> > index 00..e14a3a420e
> > --- /dev/null
> > +++ b/tests/qemu-iotests/tests/zoned.sh
> > @@ -0,0 +1,69 @@
> > +#!/usr/bin/env bash
> > +#
> > +# Test zone management operations.
> > +#
> > +
> > +seq="$(basename $0)"
> > +echo "QA output created by $seq"
> > +status=1 # failure is the default!
> > +
> > +_cleanup()
> > +{
> > +  _cleanup_test_img
> > +  sudo rmmod null_blk
> > +}
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +# get standard environment, filters and checks
> > +. ./common.rc
> > +. ./common.filter
> > +. ./common.qemu
> > +
> > +# This test only runs on Linux hosts with raw image files.
> > +_supported_fmt raw
> > +_supported_proto file
> > +_supported_os Linux
> > +
> > +QEMU_IO="build/qemu-io"
> > +IMG="--image-opts driver=zoned_host_device,filename=/dev/nullb0"
> > +QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT
> > +
> > +echo "Testing a null_blk device"
> > +echo "Simple cases: if the operations work"
> > +sudo modprobe null_blk nr_devices=1 zoned=1
> 
> Jens & Ming Lei:
> 
> null_blk is not ideal for automated test cases because it requires
> root and is global. If two tests are run at the same time they will
> interfere with each other. There is no way to have independent
> instances of the null_blk kernel module and the /dev/nullb0 device on
> a single Linux system. I think this test case can be merged for now
> but if there's time I suggest investigating alternatives.
> 
> Maybe the new Linux ublk_drv driver is a better choice if it supports
> unprivileged operation with multiple independent block devices (plus

This can be one big topic, and IMO, the io path needs to be reviewed
wrt. security risk. Also if one block device is stuck, it shouldn't
affect others, so far it can be one problem at least in sync/writeback,
if one device is hang, writeback on all block device may not move on.

> zoned storage!). It would be necessary to write a userspace null block
> device that supports zoned storage if that doesn't exist already. I
> have CCed Ming Lei and Jens Axboe for thoughts.

IMO, it shouldn't be hard to simulate zoned from userspace, the
patches[1] for setting device parameters are just sent out, then zoned
parameter could be sent to driver with the added commands easily.

Even ublk can be used to implement zoned target, such as, ublk is
exposed as one normal disk, but the backing disk could be one real
zoned/zns device.

[1] 
https://lore.kernel.org/linux-block/20220727141628.985429-1-ming@redhat.com/T/#mb5518cf418b63fb6ca649f0df199137e50907a29

thanks,
Ming




[PATCH v1 3/5] hw/virtio: gracefully handle unset vhost_dev vdev

2022-07-27 Thread Alex Bennée
I've noticed asserts firing because we query the status of vdev after
a vhost connection is closed down. Rather than faulting on the NULL
indirect just quietly reply false.

Signed-off-by: Alex Bennée 
Message-Id: <20220726192150.2435175-8-alex.ben...@linaro.org>
---
 hw/virtio/vhost.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 0827d631c0..f758f177bb 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -306,7 +306,7 @@ static inline void vhost_dev_log_resize(struct vhost_dev 
*dev, uint64_t size)
 dev->log_size = size;
 }
 
-static int vhost_dev_has_iommu(struct vhost_dev *dev)
+static bool vhost_dev_has_iommu(struct vhost_dev *dev)
 {
 VirtIODevice *vdev = dev->vdev;
 
@@ -316,8 +316,12 @@ static int vhost_dev_has_iommu(struct vhost_dev *dev)
  * does not have IOMMU, there's no need to enable this feature
  * which may cause unnecessary IOTLB miss/update transactions.
  */
-return virtio_bus_device_iommu_enabled(vdev) &&
-   virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+if (vdev) {
+return virtio_bus_device_iommu_enabled(vdev) &&
+virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+} else {
+return false;
+}
 }
 
 static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
-- 
2.30.2




[PATCH v1 2/5] hw/virtio: incorporate backend features in features

2022-07-27 Thread Alex Bennée
There are some extra bits used over a vhost-user connection which are
hidden from the device itself. We need to set them here to ensure we
enable things like the protocol extensions.

Currently net/vhost-user.c has it's own inscrutable way of persisting
this data but it really should live in the core vhost_user code.

Signed-off-by: Alex Bennée 
Message-Id: <20220726192150.2435175-7-alex.ben...@linaro.org>
---
 hw/virtio/vhost-user.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 75b8df21a4..1936a44e82 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -1460,7 +1460,14 @@ static int vhost_user_set_features(struct vhost_dev *dev,
  */
 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
 
-return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features,
+/*
+ * We need to include any extra backend only feature bits that
+ * might be needed by our device. Currently this includes the
+ * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
+ * features.
+ */
+return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
+  features | dev->backend_features,
   log_enabled);
 }
 
-- 
2.30.2




[PATCH v1 5/5] hw/virtio: fix vhost_user_read tracepoint

2022-07-27 Thread Alex Bennée
As reads happen in the callback we were never seeing them. We only
really care about the header so move the tracepoint to when the header
is complete.

Fixes: 6ca6d8ee9d (hw/virtio: add vhost_user_[read|write] trace points)
Signed-off-by: Alex Bennée 
Message-Id: <20220726192150.2435175-10-alex.ben...@linaro.org>
---
 hw/virtio/vhost-user.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 1936a44e82..c0b50deaf2 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -295,6 +295,8 @@ static int vhost_user_read_header(struct vhost_dev *dev, 
VhostUserMsg *msg)
 return -EPROTO;
 }
 
+trace_vhost_user_read(msg->hdr.request, msg->hdr.flags);
+
 return 0;
 }
 
@@ -544,8 +546,6 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, 
uint64_t base,
 }
 }
 
-trace_vhost_user_read(msg.hdr.request, msg.hdr.flags);
-
 return 0;
 }
 
-- 
2.30.2




[PATCH v1 4/5] hw/virtio: handle un-configured shutdown in virtio-pci

2022-07-27 Thread Alex Bennée
The assert() protecting against leakage is a little aggressive and
causes needless crashes if a device is shutdown without having been
configured. In this case no descriptors are lost because none have
been assigned.

Signed-off-by: Alex Bennée 
Message-Id: <20220726192150.2435175-9-alex.ben...@linaro.org>
---
 hw/virtio/virtio-pci.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 45327f0b31..5ce61f9b45 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -996,9 +996,14 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, 
int nvqs, bool assign)
 
 nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
 
-/* When deassigning, pass a consistent nvqs value
- * to avoid leaking notifiers.
+/*
+ * When deassigning, pass a consistent nvqs value to avoid leaking
+ * notifiers. But first check we've actually been configured, exit
+ * early if we haven't.
  */
+if (!assign && !proxy->nvqs_with_notifiers) {
+return 0;
+}
 assert(assign || nvqs == proxy->nvqs_with_notifiers);
 
 proxy->nvqs_with_notifiers = nvqs;
-- 
2.30.2




[PATCH v1 1/5] block/vhost-user-blk-server: don't expose VHOST_USER_F_PROTOCOL_FEATURES

2022-07-27 Thread Alex Bennée
This bit is unused in actual VirtIO feature negotiation and should
only appear in the vhost-user messages between master and slave.

[AJB: experiment, this doesn't break the tests but I'm not super
confident of the range of tests]

Signed-off-by: Alex Bennée 
Message-Id: <20220726192150.2435175-6-alex.ben...@linaro.org>
---
 block/export/vhost-user-blk-server.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index 3409d9e02e..d31436006d 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -125,8 +125,7 @@ static uint64_t vu_blk_get_features(VuDev *dev)
1ull << VIRTIO_BLK_F_MQ |
1ull << VIRTIO_F_VERSION_1 |
1ull << VIRTIO_RING_F_INDIRECT_DESC |
-   1ull << VIRTIO_RING_F_EVENT_IDX |
-   1ull << VHOST_USER_F_PROTOCOL_FEATURES;
+   1ull << VIRTIO_RING_F_EVENT_IDX ;
 
 if (!vexp->handler.writable) {
 features |= 1ull << VIRTIO_BLK_F_RO;
-- 
2.30.2




[PATCH for 7.1 v1 0/5] virtio fixes (split from virtio-gpio series)

2022-07-27 Thread Alex Bennée
Hi,

This is just a split out series based on:

   Subject: [PATCH v3 for 7.2 00/21] virtio-gpio and various virtio cleanups
   Date: Tue, 26 Jul 2022 20:21:29 +0100
   Message-Id: <20220726192150.2435175-1-alex.ben...@linaro.org>

with the patches identified by mst:

  Right. Still I think some are fixes we should merge now.
  I am thinking patches 5, 7,8,9 ? 6 if it makes backporting
  much easier. WDYT? If you agree pls separate bugfixes in
  series I can apply. Thanks!

So here they are ;-)

Alex Bennée (5):
  block/vhost-user-blk-server: don't expose
VHOST_USER_F_PROTOCOL_FEATURES
  hw/virtio: incorporate backend features in features
  hw/virtio: gracefully handle unset vhost_dev vdev
  hw/virtio: handle un-configured shutdown in virtio-pci
  hw/virtio: fix vhost_user_read tracepoint

 block/export/vhost-user-blk-server.c |  3 +--
 hw/virtio/vhost-user.c   | 13 ++---
 hw/virtio/vhost.c| 10 +++---
 hw/virtio/virtio-pci.c   |  9 +++--
 4 files changed, 25 insertions(+), 10 deletions(-)

-- 
2.30.2




Re: [PATCH v10 18/21] job.c: enable job lock/unlock and remove Aiocontext locks

2022-07-27 Thread Vladimir Sementsov-Ogievskiy

On 7/25/22 10:38, Emanuele Giuseppe Esposito wrote:

Change the job_{lock/unlock} and macros to use job_mutex.

Now that they are not nop anymore, remove the aiocontext
to avoid deadlocks.

Therefore:
- when possible, remove completely the aiocontext lock/unlock pair
- if it is used by some other function too, reduce the locking
   section as much as possible, leaving the job API outside.
- change AIO_WAIT_WHILE in AIO_WAIT_WHILE_UNLOCKED, since we
   are not using the aiocontext lock anymore

There is only one JobDriver callback, ->free() that assumes that
the aiocontext lock is held (because it calls bdrv_unref), so for
now keep that under aiocontext lock.

Also remove real_job_{lock/unlock}, as they are replaced by the
public functions.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Stefan Hajnoczi 
---
  blockdev.c   | 74 +---
  include/qemu/job.h   | 22 -
  job-qmp.c| 46 +++--
  job.c| 84 ++--
  tests/unit/test-bdrv-drain.c |  4 +-
  tests/unit/test-block-iothread.c |  2 +-
  tests/unit/test-blockjob.c   | 15 ++
  7 files changed, 52 insertions(+), 195 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 5b79093155..2cd84d206c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -155,12 +155,7 @@ void blockdev_mark_auto_del(BlockBackend *blk)
  for (job = block_job_next_locked(NULL); job;
   job = block_job_next_locked(job)) {
  if (block_job_has_bdrv(job, blk_bs(blk))) {
-AioContext *aio_context = job->job.aio_context;
-aio_context_acquire(aio_context);
-
  job_cancel_locked(>job, false);
-
-aio_context_release(aio_context);
  }
  }
  
@@ -1836,14 +1831,7 @@ static void drive_backup_abort(BlkActionState *common)

  DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
  
  if (state->job) {

-AioContext *aio_context;
-
-aio_context = bdrv_get_aio_context(state->bs);
-aio_context_acquire(aio_context);
-
  job_cancel_sync(>job->job, true);
-
-aio_context_release(aio_context);
  }
  }
  
@@ -1937,14 +1925,7 @@ static void blockdev_backup_abort(BlkActionState *common)

  BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, 
common);
  
  if (state->job) {

-AioContext *aio_context;
-
-aio_context = bdrv_get_aio_context(state->bs);
-aio_context_acquire(aio_context);
-
  job_cancel_sync(>job->job, true);
-
-aio_context_release(aio_context);
  }
  }
  
@@ -3306,19 +3287,14 @@ out:

  }
  
  /*

- * Get a block job using its ID and acquire its AioContext.
- * Called with job_mutex held.
+ * Get a block job using its ID. Called with job_mutex held.
   */
-static BlockJob *find_block_job_locked(const char *id,
-   AioContext **aio_context,
-   Error **errp)
+static BlockJob *find_block_job_locked(const char *id, Error **errp)
  {
  BlockJob *job;
  
  assert(id != NULL);
  
-*aio_context = NULL;

-
  job = block_job_get_locked(id);
  
  if (!job) {

@@ -3327,36 +3303,30 @@ static BlockJob *find_block_job_locked(const char *id,
  return NULL;
  }
  
-*aio_context = block_job_get_aio_context(job);

-aio_context_acquire(*aio_context);
-
  return job;
  }
  
  void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)

  {
-AioContext *aio_context;
  BlockJob *job;
  
  JOB_LOCK_GUARD();

-job = find_block_job_locked(device, _context, errp);
+job = find_block_job_locked(device, errp);
  
  if (!job) {

  return;
  }
  
  block_job_set_speed_locked(job, speed, errp);

-aio_context_release(aio_context);
  }
  
  void qmp_block_job_cancel(const char *device,

bool has_force, bool force, Error **errp)
  {
-AioContext *aio_context;
  BlockJob *job;
  
  JOB_LOCK_GUARD();

-job = find_block_job_locked(device, _context, errp);
+job = find_block_job_locked(device, errp);
  
  if (!job) {

  return;
@@ -3369,22 +3339,19 @@ void qmp_block_job_cancel(const char *device,
  if (job_user_paused_locked(>job) && !force) {
  error_setg(errp, "The block job for device '%s' is currently paused",
 device);
-goto out;
+return;
  }
  
  trace_qmp_block_job_cancel(job);

  job_user_cancel_locked(>job, force, errp);
-out:
-aio_context_release(aio_context);
  }
  
  void qmp_block_job_pause(const char *device, Error **errp)

  {
-AioContext *aio_context;
  BlockJob *job;
  
  JOB_LOCK_GUARD();

-job = find_block_job_locked(device, _context, errp);
+job = find_block_job_locked(device, errp);
  
  if (!job) {

  return;
@@ 

Re: [PATCH v2 07/11] vfio/migration: Implement VFIO migration protocol v2

2022-07-27 Thread Avihai Horon



On 7/18/2022 6:12 PM, Jason Gunthorpe wrote:

On Mon, May 30, 2022 at 08:07:35PM +0300, Avihai Horon wrote:


+/* Returns 1 if end-of-stream is reached, 0 if more data and -1 if error */
+static int vfio_save_block(QEMUFile *f, VFIOMigration *migration)
+{
+ssize_t data_size;
+
+data_size = read(migration->data_fd, migration->data_buffer,
+ migration->data_buffer_size);
+if (data_size < 0) {
+return -1;
+}
+if (data_size == 0) {
+return 1;
+}
+
+qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
+qemu_put_be64(f, data_size);
+qemu_put_buffer_async(f, migration->data_buffer, data_size, false);
+qemu_fflush(f);
+bytes_transferred += data_size;
+
+trace_vfio_save_block(migration->vbasedev->name, data_size);
+
+return qemu_file_get_error(f);
+}

We looked at this from an eye to "how much data is transfered" per
callback.

The above function is the basic data mover, and
'migration->data_buffer_size' is set to 1MB at the moment.

So, we product up to 1MB VFIO_MIG_FLAG_DEV_DATA_STATE sections.

This series does not include the precopy support, but that will
include a precopy 'save_live_iterate' function like this:

static int vfio_save_iterate(QEMUFile *f, void *opaque)
{
 VFIODevice *vbasedev = opaque;
 VFIOMigration *migration = vbasedev->migration;
 int ret;

 ret = vfio_save_block(f, migration);
 if (ret < 0) {
 return ret;
 }
 if (ret == 1) {
 return 1;
 }
 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
 return 0;
}

Thus, during precopy this will never do more than 1MB per callback.


+static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
+{
+VFIODevice *vbasedev = opaque;
+enum vfio_device_mig_state recover_state;
+int ret;
+
+/* We reach here with device state STOP or STOP_COPY only */
+recover_state = VFIO_DEVICE_STATE_STOP;
+ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+   recover_state);
+if (ret) {
+return ret;
+}
+
+do {
+ret = vfio_save_block(f, vbasedev->migration);
+if (ret < 0) {
+return ret;
+}
+} while (!ret);

This seems to be the main problem where we chain together 1MB blocks
until the entire completed precopy data is completed. The above is
hooked to 'save_live_complete_precopy'

So, if we want to break the above up into some 'save_iterate' like
function, do you have some advice how to do it? The above do/while
must happen after the VFIO_DEVICE_STATE_STOP_COPY.


Ping.

Juan, AFAIU (and correct me if I am wrong) the problem on source side is 
that save_live_complete_precopy handlers are called with iothread 
locked, so during this time QEMU is non-responsive.
On destination side, we don't yield every now and then like RAM code 
does, so QEMU is non-responsive there as well.


Is it possible to solve this problem by letting the VFIO 
save_live_complete_precopy handler run outside the iothread lock?


For example, add a function to SaveVMHandlers that indicates whether 
this specific save_live_complete_precopy handler should run 
inside/outside iothread lock?
Or add a save_live_complete_precopy_nonblocking handler that runs 
outside the iothread lock?


On destination side, since VFIO data is sent in chunks of 1MB, we can 
yield every now and then.


What do you think?

Thanks.


For mlx5 the above loop will often be ~10MB's for small VMs and
100MB's for big VMs (big meaning making extensive use of RDMA
functionality), and this will not change with pre-copy support or not.

Is it still a problem?

For other devices, like a GPU, I would imagine pre-copy support is
implemented and this will be a smaller post-precopy residual.

Jason




Re: VIRTIO_NET_F_MTU not negotiated

2022-07-27 Thread Michael S. Tsirkin
On Wed, Jul 27, 2022 at 10:16:19AM +, Eli Cohen wrote:
> > -Original Message-
> > From: Michael S. Tsirkin 
> > Sent: Wednesday, July 27, 2022 12:35 PM
> > To: Eli Cohen 
> > Cc: Eugenio Perez Martin ; qemu-devel@nongnu.org; 
> > Jason Wang ;
> > virtualizat...@lists.linux-foundation.org
> > Subject: Re: VIRTIO_NET_F_MTU not negotiated
> > 
> > On Wed, Jul 27, 2022 at 09:04:47AM +, Eli Cohen wrote:
> > > > -Original Message-
> > > > From: Michael S. Tsirkin 
> > > > Sent: Wednesday, July 27, 2022 10:25 AM
> > > > To: Eli Cohen 
> > > > Cc: Eugenio Perez Martin ; qemu-devel@nongnu.org; 
> > > > Jason Wang ;
> > > > virtualizat...@lists.linux-foundation.org
> > > > Subject: Re: VIRTIO_NET_F_MTU not negotiated
> > > >
> > > > On Wed, Jul 27, 2022 at 06:51:56AM +, Eli Cohen wrote:
> > > > > I found out that the reason why I could not enforce the mtu stems 
> > > > > from the fact that I did not configure max mtu for the net
> > device
> > > > (e.g. through libvirt ).
> > > > > Libvirt does not allow this configuration for vdpa devices and 
> > > > > probably for a reason. The vdpa backend driver has the freedom
> > to do
> > > > it using its copy of virtio_net_config.
> > > > >
> > > > > The code in qemu that is responsible to allow to consider the device 
> > > > > MTU restriction is here:
> > > > >
> > > > > static void virtio_net_device_realize(DeviceState *dev, Error **errp)
> > > > > {
> > > > > VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > VirtIONet *n = VIRTIO_NET(dev);
> > > > > NetClientState *nc;
> > > > > int i;
> > > > >
> > > > > if (n->net_conf.mtu) {
> > > > > n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
> > > > > }
> > > > >
> > > > > The above code can be interpreted as follows:
> > > > > if the command line arguments of qemu indicates that mtu should be 
> > > > > limited, then we would read this mtu limitation from the
> > > > device (that actual value is ignored).
> > > > >
> > > > > I worked around this limitation by unconditionally setting 
> > > > > VIRTIO_NET_F_MTU in the host features. As said, it only indicates
> > that
> > > > we should read the actual limitation for the device.
> > > > >
> > > > > If this makes sense I can send a patch to fix this.
> > > >
> > > > Well it will then either have to be for vdpa only, or have
> > > > compat machinery to avoid breaking migration.
> > > >
> > >
> > > How about this one:
> > >
> > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > > index 1067e72b3975..e464e4645c79 100644
> > > --- a/hw/net/virtio-net.c
> > > +++ b/hw/net/virtio-net.c
> > > @@ -3188,6 +3188,7 @@ static void 
> > > virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
> > >  static void virtio_net_set_config_size(VirtIONet *n, uint64_t 
> > > host_features)
> > >  {
> > >  virtio_add_feature(_features, VIRTIO_NET_F_MAC);
> > > +virtio_add_feature(_features, VIRTIO_NET_F_MTU);
> > >
> > >  n->config_size = virtio_feature_get_config_size(feature_sizes,
> > >  host_features);
> > 
> > Seems to increase config size unconditionally?
> 
> Right but you pay for reading two more bytes. Is that such a high price to 
> pay?


That's not a performance question. The issue compatibility, size
should not change for a given machine type.


> > 
> > > @@ -3512,6 +3513,7 @@ static void virtio_net_device_realize(DeviceState 
> > > *dev, Error **errp)
> > >
> > > if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) 
> > > {
> > >  struct virtio_net_config netcfg = {};
> > > +n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
> > >  memcpy(, >nic_conf.macaddr, ETH_ALEN);
> > >  vhost_net_set_config(get_vhost_net(nc->peer),
> > >  (uint8_t *), 0, ETH_ALEN, 
> > > VHOST_SET_CONFIG_TYPE_MASTER);
> > 
> > And the point is vdpa does not support migration anyway ATM, right?
> > 
> 
> I don't see how this can affect vdpa live migration. Am I missing something?

config size affects things like pci BAR size. This must not change
during migration.

-- 
MST




Re: virtio: why no full reset on virtio_set_status 0 ?

2022-07-27 Thread Michael S. Tsirkin
On Wed, Jul 27, 2022 at 12:51:31PM +0200, Claudio Fontana wrote:
> Hi Michael and all,
> 
> I have started researching a qemu / ovs / dpdk bug:
> 
> https://inbox.dpdk.org/dev/322122fb-619d-96f6-5c3e-9eabdbf38...@redhat.com/T/
> 
> that seems to be affecting multiple parties in the telco space,
> 
> and during this process I noticed that qemu/hw/virtio/virtio.c does not do a 
> full virtio reset
> in virtio_set_status, when receiving a status value of 0.
> 
> It seems it has always been this way, so I am clearly missing / forgetting 
> something basic,
> 
> I checked the virtio spec at https://docs.oasis-open.org/
> 
> and from:
> 
> "
> 4.1.4.3 Common configuration structure layout
> 
> device_status
> The driver writes the device status here (see 2.1). Writing 0 into this field 
> resets the device.
> 
> "
> 
> and
> 
> "
> 2.4.1 Device Requirements: Device Reset
> A device MUST reinitialize device status to 0 after receiving a reset.
> "
> 
> I would conclude that in virtio.c::virtio_set_status we should 
> unconditionally do a full virtio_reset.
> 
> Instead, we have just the check:
> 
> if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
> (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
> virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
> }
> 
> which just sets the started field,
> 
> and then we have the call to the virtio device class set_status 
> (virtio_net...),
> but the VirtioDevice is not fully reset, as per the virtio_reset() call we 
> are missing:
> 
> "
> vdev->start_on_kick = false;
> vdev->started = false;
> vdev->broken = false;
> vdev->guest_features = 0;
> vdev->queue_sel = 0;
> vdev->status = 0;
> vdev->disabled = false;
> qatomic_set(>isr, 0);
> vdev->config_vector = VIRTIO_NO_VECTOR;
> virtio_notify_vector(vdev, vdev->config_vector);
> 
> for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
> ... initialize vdev->vq[i] ...
> }
> "
> 
> Doing a full reset seems to fix the problem for me, so I can send tentative 
> patches if necessary,
> but what am I missing here?
> 
> Thanks,
> 
> Claudio
> 
> -- 
> Claudio Fontana
> Engineering Manager Virtualization, SUSE Labs Core
> 
> SUSE Software Solutions Italy Srl


So for example for pci:

case VIRTIO_PCI_STATUS:




if (vdev->status == 0) {
virtio_pci_reset(DEVICE(proxy));
}

which I suspect is a bug because:

static void virtio_pci_reset(DeviceState *qdev)
{
VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
VirtioBusState *bus = VIRTIO_BUS(>bus);
PCIDevice *dev = PCI_DEVICE(qdev);
int i;

virtio_bus_reset(bus);
msix_unuse_all_vectors(>pci_dev);

for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
proxy->vqs[i].enabled = 0;
proxy->vqs[i].num = 0;
proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0;
proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0;
proxy->vqs[i].used[0] = proxy->vqs[i].used[1] = 0;
}


so far so good

if (pci_is_express(dev)) {
pcie_cap_deverr_reset(dev);
pcie_cap_lnkctl_reset(dev);

pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0);
}

this part is wrong I think, it got here by mistake since the same
function is used for bus level reset.

Jason, Marcel, any input?

-- 
MST




Re: [PATCH v10 17/21] blockjob: protect iostatus field in BlockJob struct

2022-07-27 Thread Vladimir Sementsov-Ogievskiy

On 7/25/22 10:38, Emanuele Giuseppe Esposito wrote:

iostatus is the only field (together with .job) that needs
protection using the job mutex.

It is set in the main loop (GLOBAL_STATE functions) but read
in I/O code (block_job_error_action).


Hmm, block_job_error_action doesn't read iostatus..

Also, iostatus is read by by mirror_run, which is not protected.



In order to protect it, change block_job_iostatus_set_err
to block_job_iostatus_set_err_locked(), always called under
job lock.

Signed-off-by: Emanuele Giuseppe Esposito 
---
  blockjob.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 0663faee2c..448bdb5a53 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -363,7 +363,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
  return block_job_query_locked(job, errp);
  }
  
-static void block_job_iostatus_set_err(BlockJob *job, int error)

+/* Called with job lock held */
+static void block_job_iostatus_set_err_locked(BlockJob *job, int error)
  {
  if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
  job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
@@ -577,8 +578,8 @@ BlockErrorAction block_job_error_action(BlockJob *job, 
BlockdevOnError on_err,
   */
  job->job.user_paused = true;
  }
+block_job_iostatus_set_err_locked(job, error);
  }
-block_job_iostatus_set_err(job, error);
  }
  return action;
  }



--
Best regards,
Vladimir



Re: fu740 target

2022-07-27 Thread Ben Dooks

On 27/07/2022 15:38, Bin Meng wrote:

On Wed, Jul 27, 2022 at 10:24 PM Ben Dooks  wrote:


Is anyone working on adding a sifive-u74 core to the list of supported
CPU types? I was looking at full emulation of the Unmatched but at the
moment the best we have is sifive-u54 and I think that misses at least
two CSRs the sifive-u74 has.

Does anyone have plans to add the sifive-u74, and if not, would a plan
to add gradual support for it like adding CSRs 0x7c1 and 0x7c2 so we
can run an Unmatched U-boot SPL against it.


Adding 0x7c1/7c2 would be a vendor-specific CSR approach?


Part of the FU740 feature disable controls



If not, is there a definitive U54->U74 set of public differnces around
we could use to start from? I'd like to be able to run a full Unmatched
image using qemu at some point to add to the current real-board testing
we're doing.

(I have a basic addition of the type and the two CSRs as a couple of
patches if that would help as a start)



I am not aware of anyone doing U74 modeling in QEMU, but SiFive folks
(+Frank) may have one downstream as I see they posted several bug
fixes in the existing U54 model.

Regards,
Bin


Thanks

--
Ben Dooks   http://www.codethink.co.uk/
Senior Engineer Codethink - Providing Genius

https://www.codethink.co.uk/privacy.html



Re: [PATCH v10 14/21] jobs: protect job.aio_context with BQL and job_mutex

2022-07-27 Thread Vladimir Sementsov-Ogievskiy

On 7/25/22 10:38, Emanuele Giuseppe Esposito wrote:

In order to make it thread safe, implement a "fake rwlock",
where we allow reads under BQL *or* job_mutex held, but
writes only under BQL *and* job_mutex.

The only write we have is in child_job_set_aio_ctx, which always
happens under drain (so the job is paused).
For this reason, introduce job_set_aio_context and make sure that
the context is set under BQL, job_mutex and drain.


actually, we only make sure that pause was scheduled


Also make sure all other places where the aiocontext is read
are protected.

Note: at this stage, job_{lock/unlock} and job lock guard macros
are *nop*.

Suggested-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Stefan Hajnoczi 
---
  block/replication.c |  6 --
  blockjob.c  |  3 ++-
  include/qemu/job.h  | 19 ++-
  job.c   | 12 
  4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/block/replication.c b/block/replication.c
index 55c8f894aa..2189863df1 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -148,8 +148,10 @@ static void replication_close(BlockDriverState *bs)
  }
  if (s->stage == BLOCK_REPLICATION_FAILOVER) {
  commit_job = >commit_job->job;
-assert(commit_job->aio_context == qemu_get_current_aio_context());
-job_cancel_sync(commit_job, false);
+WITH_JOB_LOCK_GUARD() {
+assert(commit_job->aio_context == qemu_get_current_aio_context());
+job_cancel_sync_locked(commit_job, false);
+}
  }
  
  if (s->mode == REPLICATION_MODE_SECONDARY) {

diff --git a/blockjob.c b/blockjob.c
index 96fb9d9f73..9ff2727025 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -162,12 +162,13 @@ static void child_job_set_aio_ctx(BdrvChild *c, 
AioContext *ctx,
  bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore);
  }
  
-job->job.aio_context = ctx;

+job_set_aio_context(>job, ctx);
  }
  
  static AioContext *child_job_get_parent_aio_context(BdrvChild *c)

  {
  BlockJob *job = c->opaque;
+assert(qemu_in_main_thread());
  
  return job->job.aio_context;

  }
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 5709e8d4a8..c144aabefc 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -77,7 +77,12 @@ typedef struct Job {
  
  /** Protected by AioContext lock */
  
-/** AioContext to run the job coroutine in */

+/**
+ * AioContext to run the job coroutine in.
+ * This field can be read when holding either the BQL (so we are in
+ * the main loop) or the job_mutex.
+ * It can be only written when we hold *both* BQL and job_mutex.
+ */
  AioContext *aio_context;
  
  /** Reference count of the block job */

@@ -741,4 +746,16 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error 
**errp),
  int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp),
 Error **errp);
  
+/**

+ * Sets the @job->aio_context.
+ * Called with job_mutex *not* held.
+ *
+ * This function must run in the main thread to protect against
+ * concurrent read in job_finish_sync_locked(),
+ * takes the job_mutex lock to protect against the read in
+ * job_do_yield_locked(), and must be called when the coroutine
+ * is quiescent.


What means "coroutine is quescent"? Could we just swap it by "job is paused"?


+ */
+void job_set_aio_context(Job *job, AioContext *ctx);
+
  #endif
diff --git a/job.c b/job.c
index ecec66b44e..0a857b1468 100644
--- a/job.c
+++ b/job.c
@@ -394,6 +394,17 @@ Job *job_get(const char *id)
  return job_get_locked(id);
  }
  
+void job_set_aio_context(Job *job, AioContext *ctx)

+{
+/* protect against read in job_finish_sync_locked and job_start */
+assert(qemu_in_main_thread());
+/* protect against read in job_do_yield_locked */
+JOB_LOCK_GUARD();
+/* ensure the coroutine is quiescent while the AioContext is changed */


pause_count means pause was scheduled. Job may be paused already, or may be 
not. I'm not against the assertion, it helps. I just think that we don't have 
the guarantee that comment claims. (And I still don't understand what means 
coroutine is quiescent. And not that there are may be several job related 
coroutines: the main one and some worker coroutines).


+assert(job->pause_count > 0);
+job->aio_context = ctx;
+}
+
  /* Called with job_mutex *not* held. */
  static void job_sleep_timer_cb(void *opaque)
  {
@@ -1376,6 +1387,7 @@ int job_finish_sync_locked(Job *job,
  {
  Error *local_err = NULL;
  int ret;
+assert(qemu_in_main_thread());
  
  job_ref_locked(job);
  


without "quiescent coroutine" concept:
Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir



Re: [RFC] hw/nvme: Use irqfd to send interrupts

2022-07-27 Thread Stefan Hajnoczi
On Wed, 27 Jul 2022 at 03:18, Klaus Jensen  wrote:
>
> On Jul 21 09:29, Stefan Hajnoczi wrote:
> > On Wed, Jul 20, 2022, 22:36 Jinhao Fan  wrote:
> >
> > > Hi Stefan,
> > >
> > > Thanks for the detailed explanation!
> > >
> > > at 6:21 PM, Stefan Hajnoczi  wrote:
> > >
> > > > Hi Jinhao,
> > > > Thanks for working on this!
> > > >
> > > > irqfd is not necessarily faster than KVM ioctl interrupt injection.
> > > >
> > > > There are at least two non performance reasons for irqfd:
> > > > 1. It avoids QEMU emulation code, which historically was not thread safe
> > > and needed the Big QEMU Lock. IOThreads don't hold the BQL and therefore
> > > cannot safely call the regular interrupt emulation code in QEMU. I think
> > > this is still true today although parts of the code may now be less 
> > > reliant
> > > on the BQL.
> > >
> > > This probably means we need to move to irqfd when iothread support is 
> > > added
> > > in qemu-nvme.
> > >
> >
> > Yes. You can audit the interrupt code but I'm pretty sure there is shared
> > state that needs to be protected by the BQL. So the NVMe emulation code
> > probably needs to use irqfd to avoid the interrupt emulation code.
> >
> >
> > > > 2. The eventfd interface decouples interrupt injection from the KVM
> > > ioctl interface. Vhost kernel and vhost-user device emulation code has no
> > > dependency on KVM thanks to irqfd. They work with any eventfd, including
> > > irqfd.
> > >
> > > This is contrary to our original belief. Klaus once pointed out that irqfd
> > > is KVM specific. I agreed with him since I found irqfd implementation is 
> > > in
> > > virt/kvm/eventfd.c. But irqfd indeed avoids the KVM ioctl call. Could you
> > > elaborate on what “no dependency on KVM” means?
> > >
> >
> > "They work with any eventfd, including irqfd"
> >
> > If you look at the vhost kernel or vhost-user code, you'll see they just
> > signal the eventfd. It doesn't have to be an irqfd.
> >
> > An irqfd is a specific type of eventfd that the KVM kernel module
> > implements to inject interrupts when the eventfd is signaled.
> >
> > By the way, this not only decouples vhost from the KVM kernel module, but
> > also allows QEMU to emulate MSI-X masking via buffering the interrupt in
> > userspace.
> >
> >
>
> The virtio dataplane (iothread support) only works with kvm if I am not
> mistaken, so I guess this is similar to what we want to do here. If we
> dont have KVM, we wont use iothread and we wont use the kvm
> irqchip/irqfd.
>
> Am I understanding this correctly?

I think that is incorrect. QEMU has guest notifier emulation for the
non-KVM (and non-MSI-X PCI) cases. When there is no irqfd support
available, QEMU sets up a regular eventfd and calls
virtio_queue_guest_notifier_read() when it becomes readable.

Stefan



Re: [RFC v4 0/9] Add support for zoned device

2022-07-27 Thread Sam Li
Stefan Hajnoczi  于2022年7月27日周三 23:06写道:
>
> This patch series introduces the concept of zoned storage to the QEMU
> block layer. Documentation is needed so that users and developers know
> how to use and maintain this feature.
>
> As a minimum, let's document how to pass through zoned block devices on Linux:
>
> diff --git a/docs/system/qemu-block-drivers.rst.inc
> b/docs/system/qemu-block-drivers.rst.inc
> index dfe5d2293d..f6ba05710a 100644
> --- a/docs/system/qemu-block-drivers.rst.inc
> +++ b/docs/system/qemu-block-drivers.rst.inc
> @@ -430,6 +430,12 @@ Hard disks
>you may corrupt your host data (use the ``-snapshot`` command
>line option or modify the device permissions accordingly).
>
> +Zoned block devices
> +  Zoned block devices can be passed through to the guest if the emulated
> +  storage controller supports zoned storage. Use ``--blockdev
> +  zoned_host_device,node-name=drive0,filename=/dev/nullb0`` to pass through
> +  ``/dev/nullb0`` as ``drive0``.
> +
>  Windows
>  ^^^
>
> For developers there should be an explanation of the zoned storage
> APIs and how BlockDrivers declare support. It should also mention the
> status of pass through (implemented in the zoned_host_device driver)
> vs zone emulation (not implemented in the QEMU block layer) so
> developers understand the block layer's zoned storage capabilities.
> You can add a docs/devel/zoned-storage.rst file to document this or
> let me know if you want me to write it.

I will write the document and address the issues in the reviews, which
should be in the next revision.
Thanks for reviewing!

Have a good day!
Sam



Re: [RFC v4 5/9] qemu-iotests: test new zone operations.

2022-07-27 Thread Stefan Hajnoczi
On Wed, 27 Jul 2022 at 11:00, Ming Lei  wrote:
>
> On Wed, Jul 27, 2022 at 10:34:56AM -0400, Stefan Hajnoczi wrote:
> > On Mon, 11 Jul 2022 at 22:21, Sam Li  wrote:
> > >
> > > We have added new block layer APIs of zoned block devices. Test it with:
> > > (1) Create a null_blk device, run each zone operation on it and see
> > > whether reporting right zone information.
> > >
> > > Signed-off-by: Sam Li 
> > > ---
> > >  tests/qemu-iotests/tests/zoned.sh | 69 +++
> > >  1 file changed, 69 insertions(+)
> > >  create mode 100755 tests/qemu-iotests/tests/zoned.sh
> > >
> > > diff --git a/tests/qemu-iotests/tests/zoned.sh 
> > > b/tests/qemu-iotests/tests/zoned.sh
> > > new file mode 100755
> > > index 00..e14a3a420e
> > > --- /dev/null
> > > +++ b/tests/qemu-iotests/tests/zoned.sh
> > > @@ -0,0 +1,69 @@
> > > +#!/usr/bin/env bash
> > > +#
> > > +# Test zone management operations.
> > > +#
> > > +
> > > +seq="$(basename $0)"
> > > +echo "QA output created by $seq"
> > > +status=1 # failure is the default!
> > > +
> > > +_cleanup()
> > > +{
> > > +  _cleanup_test_img
> > > +  sudo rmmod null_blk
> > > +}
> > > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > > +
> > > +# get standard environment, filters and checks
> > > +. ./common.rc
> > > +. ./common.filter
> > > +. ./common.qemu
> > > +
> > > +# This test only runs on Linux hosts with raw image files.
> > > +_supported_fmt raw
> > > +_supported_proto file
> > > +_supported_os Linux
> > > +
> > > +QEMU_IO="build/qemu-io"
> > > +IMG="--image-opts driver=zoned_host_device,filename=/dev/nullb0"
> > > +QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT
> > > +
> > > +echo "Testing a null_blk device"
> > > +echo "Simple cases: if the operations work"
> > > +sudo modprobe null_blk nr_devices=1 zoned=1
> >
> > Jens & Ming Lei:
> >
> > null_blk is not ideal for automated test cases because it requires
> > root and is global. If two tests are run at the same time they will
> > interfere with each other. There is no way to have independent
> > instances of the null_blk kernel module and the /dev/nullb0 device on
> > a single Linux system. I think this test case can be merged for now
> > but if there's time I suggest investigating alternatives.
> >
> > Maybe the new Linux ublk_drv driver is a better choice if it supports
> > unprivileged operation with multiple independent block devices (plus
>
> This can be one big topic, and IMO, the io path needs to be reviewed
> wrt. security risk. Also if one block device is stuck, it shouldn't
> affect others, so far it can be one problem at least in sync/writeback,
> if one device is hang, writeback on all block device may not move on.
>
> > zoned storage!). It would be necessary to write a userspace null block
> > device that supports zoned storage if that doesn't exist already. I
> > have CCed Ming Lei and Jens Axboe for thoughts.
>
> IMO, it shouldn't be hard to simulate zoned from userspace, the
> patches[1] for setting device parameters are just sent out, then zoned
> parameter could be sent to driver with the added commands easily.
>
> Even ublk can be used to implement zoned target, such as, ublk is
> exposed as one normal disk, but the backing disk could be one real
> zoned/zns device.
>
> [1] 
> https://lore.kernel.org/linux-block/20220727141628.985429-1-ming@redhat.com/T/#mb5518cf418b63fb6ca649f0df199137e50907a29

Thanks for replying!

For QEMU testing purposes a null block driver with zone emulation is
ideal. It does not need to persist data or zone metadata. It shouldn't
require a backing device so that testing can be performed even without
real zoned storage devices.

It should also be possible to extend Linux null_blk to make it more
friendly for parallel tests that are run without knowledge of each
other/cooperation. But I was thinking ublk may have that
infrastructure already.

Stefan



Re: [PATCH v4] hw/nvme: Use ioeventfd to handle doorbell updates

2022-07-27 Thread Jinhao Fan
After digging through the source code, I found event_notifier_cleanup() only
closes the eventfd, but does not de-register the event from QEMU’s event
loop. event_notifier_set_handler() actually interacts with the event loop.
It is a wrapper around aio_set_event_notifier(), which is again a wrapper of
aio_set_fd_handler(). Passing in a handler of NULL means removing the fd
handler from the event loop.

Therefore, originally in nvme_free_sq/cq(), we closed the eventfd but did
not delete its handler. I guess maybe sometime later the fd is reused and
that corresponding file is somehow written (A POLLIN event), this will
trigger the event loop to call the stale handler, which causes this bug.

So the correct order is:

Init:
1. event_notifier_init: Open the eventfd
2. event_notifier_set_handler: Register on the event loop
3. memory_region_add_eventfd: Associate with IO region

Cleanup:
1. memory_region_del_eventfd: De-associate with IO region
2. event_notifier_set_handler(NULL): Remove from the event loop
3. event_notifier_cleanup: Close the eventfd




Re: [RFC v4 0/9] Add support for zoned device

2022-07-27 Thread Stefan Hajnoczi
This patch series introduces the concept of zoned storage to the QEMU
block layer. Documentation is needed so that users and developers know
how to use and maintain this feature.

As a minimum, let's document how to pass through zoned block devices on Linux:

diff --git a/docs/system/qemu-block-drivers.rst.inc
b/docs/system/qemu-block-drivers.rst.inc
index dfe5d2293d..f6ba05710a 100644
--- a/docs/system/qemu-block-drivers.rst.inc
+++ b/docs/system/qemu-block-drivers.rst.inc
@@ -430,6 +430,12 @@ Hard disks
   you may corrupt your host data (use the ``-snapshot`` command
   line option or modify the device permissions accordingly).

+Zoned block devices
+  Zoned block devices can be passed through to the guest if the emulated
+  storage controller supports zoned storage. Use ``--blockdev
+  zoned_host_device,node-name=drive0,filename=/dev/nullb0`` to pass through
+  ``/dev/nullb0`` as ``drive0``.
+
 Windows
 ^^^

For developers there should be an explanation of the zoned storage
APIs and how BlockDrivers declare support. It should also mention the
status of pass through (implemented in the zoned_host_device driver)
vs zone emulation (not implemented in the QEMU block layer) so
developers understand the block layer's zoned storage capabilities.
You can add a docs/devel/zoned-storage.rst file to document this or
let me know if you want me to write it.

Stefan



Re: [PATCH] target/riscv: Ensure opcode is saved for every instruction

2022-07-27 Thread Richard Henderson

On 7/26/22 21:06, Anup Patel wrote:

I see that decode_save_opc() only saves opcode in an array
through tcg_set_insn_start_param(). Which brings me to the
question about how much are we saving by distributing
decode_save_opc() calls ?


It's not about tcg_set_insn_start_param(), but later when it is stored into the 
TranslationBlock -- see encode_search() in accel/tcg/translate-all.c.



If we distribute decode_save_opc() calls then the code becomes
fragile for future changes and we will miss adding decode_save_opc()
for some new extensions.


Perhaps the several percentage points of data savings are not significant enough to worry 
about.



r~



Re: fu740 target

2022-07-27 Thread Bin Meng
On Wed, Jul 27, 2022 at 10:44 PM Ben Dooks  wrote:
>
> On 27/07/2022 15:38, Bin Meng wrote:
> > On Wed, Jul 27, 2022 at 10:24 PM Ben Dooks  wrote:
> >>
> >> Is anyone working on adding a sifive-u74 core to the list of supported
> >> CPU types? I was looking at full emulation of the Unmatched but at the
> >> moment the best we have is sifive-u54 and I think that misses at least
> >> two CSRs the sifive-u74 has.
> >>
> >> Does anyone have plans to add the sifive-u74, and if not, would a plan
> >> to add gradual support for it like adding CSRs 0x7c1 and 0x7c2 so we
> >> can run an Unmatched U-boot SPL against it.
> >
> > Adding 0x7c1/7c2 would be a vendor-specific CSR approach?
>
> Part of the FU740 feature disable controls

Yep I know that. I was asking if you use a vendor-specific CSR
approach in QEMU to handle such cleanly.

>
> >>
> >> If not, is there a definitive U54->U74 set of public differnces around
> >> we could use to start from? I'd like to be able to run a full Unmatched
> >> image using qemu at some point to add to the current real-board testing
> >> we're doing.
> >>
> >> (I have a basic addition of the type and the two CSRs as a couple of
> >> patches if that would help as a start)
> >>
> >
> > I am not aware of anyone doing U74 modeling in QEMU, but SiFive folks
> > (+Frank) may have one downstream as I see they posted several bug
> > fixes in the existing U54 model.
> >

Regards,
Bin



Re: [RFC v4 9/9] qapi: add support for zoned host device

2022-07-27 Thread Stefan Hajnoczi
On Mon, 11 Jul 2022 at 22:31, Sam Li  wrote:
>
> ---
>  block/file-posix.c   | 8 +++-
>  qapi/block-core.json | 7 +--
>  2 files changed, 12 insertions(+), 3 deletions(-)

Please squash this into the patch that adds zoned_host_device.

Stefan



Re: [RFC v4 0/9] Add support for zoned device

2022-07-27 Thread Stefan Hajnoczi
On Mon, 11 Jul 2022 at 22:17, Sam Li  wrote:
>
> This patch series adds support for zoned device to virtio-blk emulation. Zoned
> Storage can support sequential writes, which reduces write amplification in 
> SSD,
> leading to higher write throughput and increased capacity.

Please use "git rebase -i master" and add "x make" lines after each
commit to check that the code still builds after each commit. It is
important to order patches so that each commit still builds
successfully. That way git-bisect(1) works and patches can be
backported/cherry-picked.

Stefan



Re: [RFC v4 6/9] raw-format: add zone operations

2022-07-27 Thread Stefan Hajnoczi
On Mon, 11 Jul 2022 at 22:21, Sam Li  wrote:
>
> Signed-off-by: Sam Li 
> ---
>  block/raw-format.c | 13 +
>  1 file changed, 13 insertions(+)
>
> diff --git a/block/raw-format.c b/block/raw-format.c
> index 69fd650eaf..96bdb6c1e2 100644
> --- a/block/raw-format.c
> +++ b/block/raw-format.c
> @@ -314,6 +314,17 @@ static int coroutine_fn raw_co_pdiscard(BlockDriverState 
> *bs,
>  return bdrv_co_pdiscard(bs->file, offset, bytes);
>  }
>
> +static int coroutine_fn raw_co_zone_report(BlockDriverState *bs, int64_t 
> offset,
> +   int64_t *nr_zones,
> +   BlockZoneDescriptor *zones) {
> +return bdrv_co_zone_report(bs->file->bs, offset, nr_zones, zones);
> +}
> +
> +static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, zone_op op,
> + int64_t offset, int64_t len) {
> +return bdrv_co_zone_mgmt(bs->file->bs, op, offset, len);
> +}
> +

Kevin, Markus, or Hanna: bdrv_*() APIs take a mix of BlockDriverState
*bs and BdrvChild *child arguments. Should these new APIs take bs or
child?

Reviewed-by: Stefan Hajnoczi 



Re: [RFC v4 7/9] config: add check to block layer

2022-07-27 Thread Stefan Hajnoczi
On Mon, 11 Jul 2022 at 22:22, Sam Li  wrote:
>
> Putting zoned/non-zoned BlockDrivers on top of each other is not
> allowed.
>
> Signed-off-by: Sam Li 
> ---
>  block.c  | 7 +++
>  block/file-posix.c   | 2 ++
>  include/block/block_int-common.h | 5 +
>  3 files changed, 14 insertions(+)
>
> diff --git a/block.c b/block.c
> index 2c0080..0e24582c7d 100644
> --- a/block.c
> +++ b/block.c
> @@ -7945,6 +7945,13 @@ void bdrv_add_child(BlockDriverState *parent_bs, 
> BlockDriverState *child_bs,
>  return;
>  }
>
> +if (parent_bs->drv->is_zoned != child_bs->drv->is_zoned) {
> +error_setg(errp, "Cannot add a %s child to a %s parent",
> +   child_bs->drv->is_zoned ? "zoned" : "non-zoned",
> +   parent_bs->drv->is_zoned ? "zoned" : "non-zoned");
> +return;
> +}

Please explain the rationale:

/*
 * Non-zoned block drivers do not follow zoned storage constraints
(i.e. sequential writes
 * to zones). Refuse mixing zoned and non-zoned drivers in a graph.
 */

> +
>  if (!QLIST_EMPTY(_bs->parents)) {
>  error_setg(errp, "The node %s already has a parent",
> child_bs->node_name);
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 42708012ff..e9ad1d8e1e 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -3924,6 +3924,7 @@ static BlockDriver bdrv_host_device = {
>  .format_name= "host_device",
>  .protocol_name= "host_device",
>  .instance_size  = sizeof(BDRVRawState),
> +.is_zoned = false,

In C static struct fields are automatically initialized to 0/false.
This line can be omitted.

>  .bdrv_needs_filename = true,
>  .bdrv_probe_device  = hdev_probe_device,
>  .bdrv_parse_filename = hdev_parse_filename,
> @@ -3971,6 +3972,7 @@ static BlockDriver bdrv_zoned_host_device = {
>  .format_name = "zoned_host_device",
>  .protocol_name = "zoned_host_device",
>  .instance_size = sizeof(BDRVRawState),
> +.is_zoned = true,
>  .bdrv_needs_filename = true,
>  .bdrv_probe_device  = hdev_probe_device,
>  .bdrv_parse_filename = hdev_parse_filename,
> diff --git a/include/block/block_int-common.h 
> b/include/block/block_int-common.h
> index 6037871089..29f1ec9184 100644
> --- a/include/block/block_int-common.h
> +++ b/include/block/block_int-common.h
> @@ -141,6 +141,11 @@ struct BlockDriver {
>   */
>  bool is_format;
>
> +/*
> + * Set to true if the BlockDriver is a zoned block driver.
> + */
> +bool is_zoned;

This isn't powerful enough to express the constraints.
block/raw-format.c supports both non-zoned and zoned children (after
your patch) but it won't pass the check.

Perhaps add bool supports_zoned_children and change the check to:

if (child_bs->drv->is_zoned &&
!parent_bs->drv->supports_zoned_children) { ...refuse... }

Then raw-format would work on top of zoned_host_device as well as
still working on non-zoned BDSes.

Stefan



Re: [RFC v4 8/9] include: add support for zoned block devices

2022-07-27 Thread Stefan Hajnoczi
On Mon, 11 Jul 2022 at 22:21, Sam Li  wrote:
>
> This is the virtio_blk.h header file from Dmitry's "virtio-blk: add
> support for zoned block devices" patch. It introduces
> virtio_blk_zoned_characteristics struct from Dmitry's virtio-blk zoned
> storage spec.
>
> Signed-off-by: Dmitry Fomichev 
> Signed-off-by: Sam Li 
> ---
>  include/standard-headers/linux/virtio_blk.h | 157 ++--
>  1 file changed, 141 insertions(+), 16 deletions(-)
>
> diff --git a/include/standard-headers/linux/virtio_blk.h 
> b/include/standard-headers/linux/virtio_blk.h
> index 2dcc90826a..f07fbe1b9b 100644
> --- a/include/standard-headers/linux/virtio_blk.h
> +++ b/include/standard-headers/linux/virtio_blk.h
> @@ -25,10 +25,10 @@
>   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
>   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>   * SUCH DAMAGE. */
> -#include "standard-headers/linux/types.h"
> -#include "standard-headers/linux/virtio_ids.h"
> -#include "standard-headers/linux/virtio_config.h"
> -#include "standard-headers/linux/virtio_types.h"
> +#include 
> +#include 
> +#include 
> +#include 

This file can't be copied from Linux verbatim. It needs to be
converted using scripts/update-linux-headers.sh.

Stefan



  1   2   3   >