date:20210908

Re: [PATCH RFC v2 08/16] vfio-user: get region info

2021-09-08 Thread Stefan Hajnoczi

On Thu, Sep 09, 2021 at 05:35:40AM +, John Johnson wrote:
> 
> 
> > On Sep 7, 2021, at 7:31 AM, Stefan Hajnoczi  wrote:
> > 
> > On Mon, Aug 16, 2021 at 09:42:41AM -0700, Elena Ufimtseva wrote:
> >> @@ -1514,6 +1515,16 @@ bool vfio_get_info_dma_avail(struct 
> >> vfio_iommu_type1_info *info,
> >> return true;
> >> }
> >> 
> >> +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index)
> >> +{
> >> +struct vfio_region_info *info;
> >> +
> >> +if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) {
> >> +vfio_get_region_info(vbasedev, index, );
> >> +}
> > 
> > Maybe this will be called from other places in the future, but the
> > vfio_region_setup() caller below already invoked vfio_get_region_info()
> > so I'm not sure it's necessary to do this again?
> > 
> > Perhaps add an int *remfd argument to vfio_get_region_info(). That way
> > vfio_get_region_info_remfd() isn't necessary.
> > 
> 
>   I think they could be combined, but the region capabilities are
> retrieved with separate calls to vfio_get_region_info_cap(), so I followed
> that precedent.
> 
> 
> >> @@ -2410,6 +2442,24 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
> >> index,
> >>  struct vfio_region_info **info)
> >> {
> >> size_t argsz = sizeof(struct vfio_region_info);
> >> +int fd = -1;
> >> +int ret;
> >> +
> >> +/* create region cache */
> >> +if (vbasedev->regions == NULL) {
> >> +vbasedev->regions = g_new0(struct vfio_region_info *,
> >> +   vbasedev->num_regions);
> >> +if (vbasedev->proxy != NULL) {
> >> +vbasedev->regfds = g_new0(int, vbasedev->num_regions);
> >> +}
> >> +}
> >> +/* check cache */
> >> +if (vbasedev->regions[index] != NULL) {
> >> +*info = g_malloc0(vbasedev->regions[index]->argsz);
> >> +memcpy(*info, vbasedev->regions[index],
> >> +   vbasedev->regions[index]->argsz);
> >> +return 0;
> >> +}
> > 
> > Why is it necessary to introduce a cache? Is it to avoid passing the
> > same fd multiple times?
> > 
> 
>   Yes.  For polled regions, the server returns an FD with each
> message, so there would be an FD leak if I didn’t check that the region
> hadn’t already returned one.  Since I had to cache the FD anyway, I
> cached the whole struct.

If vfio_get_region_info() takes an int *fd argument then fd ownership
becomes explicit and the need for the cache falls away. Maybe Alex has a
preference for how to structure the code to track per-region fds.

Stefan


signature.asc
Description: PGP signature

Re: [PATCH v10 3/7] [RISCV_PM] Support CSRs required for RISC-V PM extension except for the h-mode

2021-09-08 Thread Alistair Francis

On Mon, Aug 30, 2021 at 3:54 AM Alexey Baturo  wrote:
>
> Signed-off-by: Alexey Baturo 
> ---
>  target/riscv/cpu.c |   6 +
>  target/riscv/cpu.h |  11 ++
>  target/riscv/csr.c | 276 +
>  3 files changed, 293 insertions(+)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 991a6bb760..4178eecbec 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -553,6 +553,12 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
> **errp)
>  }
>  set_vext_version(env, vext_version);
>  }
> +if (cpu->cfg.ext_j) {
> +#ifndef CONFIG_USER_ONLY
> +/* mmte is supposed to have pm.current hardwired to 1 */
> +env->mmte |= (PM_EXT_INITIAL | MMTE_M_PM_CURRENT);

This should probably be in the reset function instead.

> +#endif
> +}
>
>  set_misa(env, target_misa);
>  }
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 451a1637a1..94e680cbd0 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -238,6 +238,17 @@ struct CPURISCVState {
>
>  /* True if in debugger mode.  */
>  bool debugger;
> +
> +/*
> + * CSRs for PointerMasking extension
> + */
> +target_ulong mmte;
> +target_ulong mpmmask;
> +target_ulong mpmbase;
> +target_ulong spmmask;
> +target_ulong spmbase;
> +target_ulong upmmask;
> +target_ulong upmbase;
>  #endif
>
>  float_status fp_status;
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 9a4ed18ac5..42a0867e5d 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -192,6 +192,16 @@ static RISCVException hmode32(CPURISCVState *env, int 
> csrno)
>
>  }
>
> +/* Checks if PointerMasking registers could be accessed */
> +static RISCVException pointer_masking(CPURISCVState *env, int csrno)
> +{
> +/* Check if j-ext is present */
> +if (riscv_has_ext(env, RVJ)) {
> +return RISCV_EXCP_NONE;
> +}
> +return RISCV_EXCP_ILLEGAL_INST;
> +}
> +
>  static RISCVException pmp(CPURISCVState *env, int csrno)
>  {
>  if (riscv_feature(env, RISCV_FEATURE_PMP)) {
> @@ -1404,6 +1414,259 @@ static RISCVException write_pmpaddr(CPURISCVState 
> *env, int csrno,
>  return RISCV_EXCP_NONE;
>  }
>
> +/*
> + * Functions to access Pointer Masking feature registers
> + * We have to check if current priv lvl could modify
> + * csr in given mode
> + */
> +static bool check_pm_current_disabled(CPURISCVState *env, int csrno)
> +{
> +int csr_priv = get_field(csrno, 0x300);

Can you add a newline between declarations and code?

> +/*
> + * If priv lvls differ that means we're accessing csr from higher priv 
> lvl,
> + * so allow the access
> + */
> +if (env->priv != csr_priv) {
> +return false;
> +}
> +int cur_bit_pos;

Can you keep all of the declarations at the start of blocks please.

> +switch (env->priv) {
> +case PRV_M:
> +cur_bit_pos = M_PM_CURRENT;
> +break;
> +case PRV_S:
> +cur_bit_pos = S_PM_CURRENT;
> +break;
> +case PRV_U:
> +cur_bit_pos = U_PM_CURRENT;
> +break;
> +default:
> +g_assert_not_reached();
> +}
> +int pm_current = get_field(env->mmte, cur_bit_pos);
> +/* It's same priv lvl, so we allow to modify csr only if pm.current==1 */
> +return !pm_current;
> +}
> +
> +static RISCVException read_mmte(CPURISCVState *env, int csrno,
> +target_ulong *val)
> +{
> +*val = env->mmte & MMTE_MASK;
> +return RISCV_EXCP_NONE;
> +}
> +
> +static RISCVException write_mmte(CPURISCVState *env, int csrno,
> + target_ulong val)
> +{
> +uint64_t mstatus;
> +target_ulong wpri_val = val & MMTE_MASK;
> +if (val != wpri_val) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "MMTE: WPRI violation written 0x%lx vs expected 
> 0x%lx\n",
> +  val, wpri_val);
> +}
> +/* for machine mode pm.current is hardwired to 1 */
> +wpri_val |= MMTE_M_PM_CURRENT;
> +
> +/* hardwiring pm.instruction bit to 0, since it's not supported yet */
> +wpri_val &= ~(MMTE_M_PM_INSN | MMTE_S_PM_INSN | MMTE_U_PM_INSN);
> +env->mmte = wpri_val | PM_EXT_DIRTY;
> +
> +/* Set XS and SD bits, since PM CSRs are dirty */
> +mstatus = env->mstatus | MSTATUS_XS;
> +write_mstatus(env, csrno, mstatus);
> +return RISCV_EXCP_NONE;
> +}
> +
> +static RISCVException read_smte(CPURISCVState *env, int csrno,
> +target_ulong *val)
> +{
> +*val = env->mmte & SMTE_MASK;
> +return RISCV_EXCP_NONE;
> +}
> +
> +static RISCVException write_smte(CPURISCVState *env, int csrno,
> + target_ulong val)
> +{
> +target_ulong wpri_val = val & SMTE_MASK;
> +if (val != wpri_val) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "SMTE: WPRI violation

Re: [PATCH RFC v2 11/16] vfio-user: get and set IRQs

2021-09-08 Thread John Johnson



> On Sep 7, 2021, at 8:14 AM, Stefan Hajnoczi  wrote:
> 
> On Mon, Aug 16, 2021 at 09:42:44AM -0700, Elena Ufimtseva wrote:
>> From: John Johnson 
>> 
>> Signed-off-by: Elena Ufimtseva 
>> Signed-off-by: John G Johnson 
>> Signed-off-by: Jagannathan Raman 
>> ---
>> hw/vfio/user-protocol.h |  25 ++
>> hw/vfio/user.h  |   2 +
>> hw/vfio/common.c|  26 --
>> hw/vfio/pci.c   |  31 ++--
>> hw/vfio/user.c  | 106 
>> 5 files changed, 181 insertions(+), 9 deletions(-)
>> 
>> diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
>> index 56904cf872..5614efa0a4 100644
>> --- a/hw/vfio/user-protocol.h
>> +++ b/hw/vfio/user-protocol.h
>> @@ -109,6 +109,31 @@ typedef struct {
>> uint64_t offset;
>> } VFIOUserRegionInfo;
>> 
>> +/*
>> + * VFIO_USER_DEVICE_GET_IRQ_INFO
>> + * imported from struct vfio_irq_info
>> + */
>> +typedef struct {
>> +VFIOUserHdr hdr;
>> +uint32_t argsz;
>> +uint32_t flags;
>> +uint32_t index;
>> +uint32_t count;
>> +} VFIOUserIRQInfo;
>> +
>> +/*
>> + * VFIO_USER_DEVICE_SET_IRQS
>> + * imported from struct vfio_irq_set
>> + */
>> +typedef struct {
>> +VFIOUserHdr hdr;
>> +uint32_t argsz;
>> +uint32_t flags;
>> +uint32_t index;
>> +uint32_t start;
>> +uint32_t count;
>> +} VFIOUserIRQSet;
>> +
>> /*
>>  * VFIO_USER_REGION_READ
>>  * VFIO_USER_REGION_WRITE
>> diff --git a/hw/vfio/user.h b/hw/vfio/user.h
>> index 02f832a173..248ad80943 100644
>> --- a/hw/vfio/user.h
>> +++ b/hw/vfio/user.h
>> @@ -74,6 +74,8 @@ int vfio_user_validate_version(VFIODevice *vbasedev, Error 
>> **errp);
>> int vfio_user_get_info(VFIODevice *vbasedev);
>> int vfio_user_get_region_info(VFIODevice *vbasedev, int index,
>>   struct vfio_region_info *info, VFIOUserFDs 
>> *fds);
>> +int vfio_user_get_irq_info(VFIODevice *vbasedev, struct vfio_irq_info 
>> *info);
>> +int vfio_user_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irq);
>> int vfio_user_region_read(VFIODevice *vbasedev, uint32_t index, uint64_t 
>> offset,
>>   uint32_t count, void *data);
>> int vfio_user_region_write(VFIODevice *vbasedev, uint32_t index,
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index a8b1ea9358..9fe3e05dc6 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -71,7 +71,11 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int 
>> index)
>> .count = 0,
>> };
>> 
>> -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
>> +if (vbasedev->proxy != NULL) {
>> +vfio_user_set_irqs(vbasedev, _set);
>> +} else {
>> +ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
>> +}
>> }
>> 
>> void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index)
>> @@ -84,7 +88,11 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, 
>> int index)
>> .count = 1,
>> };
>> 
>> -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
>> +if (vbasedev->proxy != NULL) {
>> +vfio_user_set_irqs(vbasedev, _set);
>> +} else {
>> +ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
>> +}
>> }
>> 
>> void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index)
>> @@ -97,7 +105,11 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int 
>> index)
>> .count = 1,
>> };
>> 
>> -ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
>> +if (vbasedev->proxy != NULL) {
>> +vfio_user_set_irqs(vbasedev, _set);
>> +} else {
>> +ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, _set);
>> +}
>> }
>> 
>> static inline const char *action_to_str(int action)
>> @@ -178,8 +190,12 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int 
>> index, int subindex,
>> pfd = (int32_t *)_set->data;
>> *pfd = fd;
>> 
>> -if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
>> -ret = -errno;
>> +if (vbasedev->proxy != NULL) {
>> +ret = vfio_user_set_irqs(vbasedev, irq_set);
>> +} else {
>> +if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
>> +ret = -errno;
>> +}
>> }
>> g_free(irq_set);
>> 
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index ea0df8be65..282de6a30b 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -403,7 +403,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, 
>> bool msix)
>> fds[i] = fd;
>> }
>> 
>> -ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
>> +if (vdev->vbasedev.proxy != NULL) {
>> +ret = vfio_user_set_irqs(>vbasedev, irq_set);
>> +} else {
>> +ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
>> +}
>> 
>> g_free(irq_set);
>> 
>> @@ -2675,7 +2679,13 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, 
>> Error **errp)
>> 
>> irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
>> 
>> -ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, _info);
>> +

Re: [PATCH RFC v2 08/16] vfio-user: get region info

2021-09-08 Thread John Johnson



> On Sep 7, 2021, at 7:31 AM, Stefan Hajnoczi  wrote:
> 
> On Mon, Aug 16, 2021 at 09:42:41AM -0700, Elena Ufimtseva wrote:
>> @@ -1514,6 +1515,16 @@ bool vfio_get_info_dma_avail(struct 
>> vfio_iommu_type1_info *info,
>> return true;
>> }
>> 
>> +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index)
>> +{
>> +struct vfio_region_info *info;
>> +
>> +if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) {
>> +vfio_get_region_info(vbasedev, index, );
>> +}
> 
> Maybe this will be called from other places in the future, but the
> vfio_region_setup() caller below already invoked vfio_get_region_info()
> so I'm not sure it's necessary to do this again?
> 
> Perhaps add an int *remfd argument to vfio_get_region_info(). That way
> vfio_get_region_info_remfd() isn't necessary.
> 

I think they could be combined, but the region capabilities are
retrieved with separate calls to vfio_get_region_info_cap(), so I followed
that precedent.


>> @@ -2410,6 +2442,24 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
>> index,
>>  struct vfio_region_info **info)
>> {
>> size_t argsz = sizeof(struct vfio_region_info);
>> +int fd = -1;
>> +int ret;
>> +
>> +/* create region cache */
>> +if (vbasedev->regions == NULL) {
>> +vbasedev->regions = g_new0(struct vfio_region_info *,
>> +   vbasedev->num_regions);
>> +if (vbasedev->proxy != NULL) {
>> +vbasedev->regfds = g_new0(int, vbasedev->num_regions);
>> +}
>> +}
>> +/* check cache */
>> +if (vbasedev->regions[index] != NULL) {
>> +*info = g_malloc0(vbasedev->regions[index]->argsz);
>> +memcpy(*info, vbasedev->regions[index],
>> +   vbasedev->regions[index]->argsz);
>> +return 0;
>> +}
> 
> Why is it necessary to introduce a cache? Is it to avoid passing the
> same fd multiple times?
> 

Yes.  For polled regions, the server returns an FD with each
message, so there would be an FD leak if I didn’t check that the region
hadn’t already returned one.  Since I had to cache the FD anyway, I
cached the whole struct.


>> 
>> *info = g_malloc0(argsz);
>> 
>> @@ -2417,7 +2467,17 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
>> index,
>> retry:
>> (*info)->argsz = argsz;
>> 
>> -if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
>> +if (vbasedev->proxy != NULL) {
>> +VFIOUserFDs fds = { 0, 1, };
>> +
>> +ret = vfio_user_get_region_info(vbasedev, index, *info, );
>> +} else {
>> +ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info);
>> +if (ret < 0) {
>> +ret = -errno;
>> +}
>> +}
>> +if (ret != 0) {
>> g_free(*info);
>> *info = NULL;
>> return -errno;
>> @@ -2426,10 +2486,22 @@ retry:
>> if ((*info)->argsz > argsz) {
>> argsz = (*info)->argsz;
>> *info = g_realloc(*info, argsz);
>> +if (fd != -1) {
>> +close(fd);
>> +fd = -1;
>> +}
>> 
>> goto retry;
>> }
>> 
>> +/* fill cache */
>> +vbasedev->regions[index] = g_malloc0(argsz);
>> +memcpy(vbasedev->regions[index], *info, argsz);
>> +*vbasedev->regions[index] = **info;
> 
> The previous line already copied the contents of *info. What is the
> purpose of this assignment?
> 

That might be a mis-merge.  The struct assignment was a bug
fixed several revs ago with the memcpy() call.


>> +if (vbasedev->regfds != NULL) {
>> +vbasedev->regfds[index] = fd;
>> +}
>> +
>> return 0;
>> }
>> 
>> diff --git a/hw/vfio/user.c b/hw/vfio/user.c
>> index b584b8e0f2..91b51f37df 100644
>> --- a/hw/vfio/user.c
>> +++ b/hw/vfio/user.c
>> @@ -734,3 +734,36 @@ int vfio_user_get_info(VFIODevice *vbasedev)
>> vbasedev->reset_works = !!(msg.flags & VFIO_DEVICE_FLAGS_RESET);
>> return 0;
>> }
>> +
>> +int vfio_user_get_region_info(VFIODevice *vbasedev, int index,
>> +  struct vfio_region_info *info, VFIOUserFDs 
>> *fds)
>> +{
>> +g_autofree VFIOUserRegionInfo *msgp = NULL;
>> +int size;
> 
> Please use uint32_t size instead of int size to prevent possible
> signedness issues:
> - VFIOUserRegionInfo->argsz is uint32_t.
> - sizeof(VFIOUserHdr) is size_t.
> - The vfio_user_request_msg() size argument is uint32_t.

OK

JJ

Re: [PATCH RFC v2 04/16] vfio-user: connect vfio proxy to remote server

2021-09-08 Thread John Johnson

> On Sep 7, 2021, at 6:21 AM, Stefan Hajnoczi  wrote:
> 
> 
> This way the network communication code doesn't need to know how
> messages will by processed by the client or server. There is no need for
> if (isreply) { qemu_cond_signal(>cv); } else {
> proxy->request(proxy->reqarg, buf, ); }. The callbacks and
> threads aren't hardcoded into the network communication code.
> 

I fear we are talking past each other.  The vfio-user protocol
is bi-directional.  e.g., the client both sends requests to the server
and receives requests from the server on the same socket.  No matter
what threading model we use, the receive algorithm will be:

read message header
if it’s a reply
   schedule the thread waiting for the reply
else
   run a callback to process the request

The only way I can see changing this is to establish two
uni-directional sockets: one for requests outbound to the server,
and one for requests inbound from the server.

This is the reason I chose the iothread model.  It can run
independently of any vCPU/main threads waiting for replies and of
the callback thread.  I did muddle this idea by having the iothread
become a callback thread by grabbing BQL and running the callback
inline when it receives a request from the server, but if you like a
pure event driven model, I can make incoming requests kick a BH from
the main loop.  e.g.,

if it’s a reply
   qemu_cond_signal(reply cv)
else
   qemu_bh_schedule(proxy bh)

That would avoid disconnect having to handle the iothread
blocked on BQL.

> This goes back to the question earlier about why a dedicated thread is
> necessary here. I suggest writing the network communication code using
> coroutines. That way the code is easier to read (no callbacks or
> thread synchronization), there are fewer thread-safety issues to worry
> about, and users or management tools don't need to know about additional
> threads (e.g. CPU/NUMA affinity).
> 

I did look at coroutines, but they seemed to work when the sender
is triggering the coroutine on send, not when request packets are arriving
asynchronously to the sends.

JJ

Re: [PATCH v1 2/3] io: Add zerocopy and errqueue

2021-09-08 Thread Leonardo Bras Soares Passos

On Wed, Sep 8, 2021 at 11:05 PM Peter Xu  wrote:
>
> On Wed, Sep 08, 2021 at 10:57:06PM +0100, Daniel P. Berrangé wrote:
> > We think we're probably ok with migration as we are going to rely on the
> > face that we eventually pause the guest to stop page changes during the
> > final switchover. None the less I really strongly dislike the idea of
> > not honouring the kernel API contract, despite the potential performance
> > benefits it brings.
>
> Yes understandable, and it does looks tricky. But it's guest page and it's 
> just
> by nature how it works to me (sending page happening in parallel with page
> being modified).
>
> I think the MSG_ZEROCOPY doc page mentioned that and it's userspace program's
> own choice if that happens. So even if it's not by design and not suggested, 
> it
> seems not forbidden either.
>
> We can wr-protect the page (using things like userfaultfd-wp) during sending
> and unprotect them when it's done with a qio callback, that'll guarantee the
> buffer not changing during sending, however we gain nothing besides the "api
> cleaness" then..
>
> Thanks,
>
> --
> Peter Xu
>

I can't stress enough how inexperienced I am in qemu codebase, but based on
the current discussion and my humble opinion, it would make sense if something
like an async_writev + async_flush method (or a callback instead) would be
offered by QIOChannel, and let the responsibility of "which flags to use",
"when to lock", and "how / when to flush" to the codebase using it.

I mean, it's clear how the sync requirements will be very different
depending on what
the using code will be sending with it, so It makes sense to me that
we offer the tool
and let it decide how it should be used (and possibly deal with any
consequences).

Is there anything that could go wrong with the above that I am missing?

About the callback proposed, I am not sure how that would work in an
efficient way.
Could someone help me with that?

FWIW, what I had in mind for a (theoretical) migration setup with
io_async_writev() + io_async_flush():
- For guest RAM we can decide not to rw_lock memory on zerocopy,
because there is no need,
- For headers, we can decide to not use async  (use io_writev() instead),
- flush() can happen each iteration of migration, or at each N
seconds, or at the end.

Thank you for the great discussion :)
Leonardo Bras

Re: [PATCH 0/5] Stop adding HMP-only commands, allow QMP for all

2021-09-08 Thread Markus Armbruster

Daniel P. Berrangé  writes:

> On Wed, Sep 08, 2021 at 05:09:13PM +0200, Markus Armbruster wrote:
>> Daniel P. Berrangé  writes:
>> 
>> > We are still adding HMP commands without any QMP counterparts. This is
>> > done because there are a reasonable number of scenarios where the cost
>> > of designing a QAPI data type for the command is not justified.
>> >
>> > This has the downside, however, that we will never be able to fully
>> > isolate the monitor code from the remainder of QEMU internals. It is
>> > desirable to be able to get to a point where subsystems in QEMU are
>> > exclusively implemented using QAPI types and never need to have any
>> > knowledge of the monitor APIs.
>> >
>> > The way to get there is to stop adding commands to HMP only. All
>> > commands must be implemented using QMP, and any HMP implementation
>> > be a shim around the QMP implementation.
>> >
>> > We don't want to compromise our supportability of QMP long term though.
>> >
>> > This series proposes that we relax our requirements around fine grained
>> > QAPI data design,
>> 
>> Specifics?  QMP command returns a string, HMP wrapper prints that
>> string?
>
> yes, a command returning a single opaque printf formatted string would
> be the common case.  At a more general POV though, the JSON doc received
> by the client should be usable "as received", immediately after JSON
> de-serialization without needing any further custom parsing on top.
>
> ie if a value needs to be parsed by the client, then it must be split
> into multiple distinct values in the QAPI data type design to remove
> the need for parsing by the client. 

Yes, that's QMP doctrine.

> If a command's design violates that, then it must remain under the
> "x-" prefix.  "info registers" is a example because we're printf
> formatting each register value into a opaque string. Any client
> needing a specific register value would have to scanf parse this
> string. The justification for not representing each register as
> a distinct QAPI field is that we don't think machines genuinely
> need to parse this, as its just adhoc human operator debug info.
> So we take the easy option and just printf to a string and put
> it under "x-" prefix

Got it.

Limitations:

1. If we convert a long-running HMP command to this technique, we print
   its output only after it completed its work.  We also end up with a
   long-running QMP command, which is bad, because it stops the main
   loop and makes the QMP monitor unresponsive (except for OOB commands,
   if the client is careful).  The former can be mitigated with
   'coroutine': true.  The latter can't.

2. We can't prompt for input.

   The only current use I can see is HMP "change vnc passwd" prompting
   for a password.  Except you currently have to say "change vnc passwd
   wtf" to get it to prompt (suspect logic error in commit cfb5387a1de).


[...]

Re: [PATCH v3 2/2] sifive_u: Connect the SiFive PWM device

2021-09-08 Thread Bin Meng

On Thu, Sep 9, 2021 at 11:55 AM Alistair Francis
 wrote:
>
> From: Alistair Francis 
>
> Connect the SiFive PWM device and expose it via the device tree.
>
> Signed-off-by: Alistair Francis 
> ---
>  docs/system/riscv/sifive_u.rst |  1 +
>  include/hw/riscv/sifive_u.h| 14 -
>  hw/riscv/sifive_u.c| 55 +-
>  hw/timer/sifive_pwm.c  |  1 +
>  hw/riscv/Kconfig   |  1 +
>  5 files changed, 70 insertions(+), 2 deletions(-)
>
> diff --git a/docs/system/riscv/sifive_u.rst b/docs/system/riscv/sifive_u.rst
> index 01108b5ecc..7c65e9c440 100644
> --- a/docs/system/riscv/sifive_u.rst
> +++ b/docs/system/riscv/sifive_u.rst
> @@ -24,6 +24,7 @@ The ``sifive_u`` machine supports the following devices:
>  * 2 QSPI controllers
>  * 1 ISSI 25WP256 flash
>  * 1 SD card in SPI mode
> +* PWM0 and PWM1
>
>  Please note the real world HiFive Unleashed board has a fixed configuration 
> of
>  1 E51 core and 4 U54 core combination and the RISC-V core boots in 64-bit 
> mode.
> diff --git a/include/hw/riscv/sifive_u.h b/include/hw/riscv/sifive_u.h
> index 2656b39808..f71c90c94c 100644
> --- a/include/hw/riscv/sifive_u.h
> +++ b/include/hw/riscv/sifive_u.h
> @@ -27,6 +27,7 @@
>  #include "hw/misc/sifive_u_otp.h"
>  #include "hw/misc/sifive_u_prci.h"
>  #include "hw/ssi/sifive_spi.h"
> +#include "hw/timer/sifive_pwm.h"
>
>  #define TYPE_RISCV_U_SOC "riscv.sifive.u.soc"
>  #define RISCV_U_SOC(obj) \
> @@ -49,6 +50,7 @@ typedef struct SiFiveUSoCState {
>  SiFiveSPIState spi0;
>  SiFiveSPIState spi2;
>  CadenceGEMState gem;
> +SiFivePwmState pwm[2];
>
>  uint32_t serial;
>  char *cpu_type;
> @@ -92,7 +94,9 @@ enum {
>  SIFIVE_U_DEV_FLASH0,
>  SIFIVE_U_DEV_DRAM,
>  SIFIVE_U_DEV_GEM,
> -SIFIVE_U_DEV_GEM_MGMT
> +SIFIVE_U_DEV_GEM_MGMT,
> +SIFIVE_U_DEV_PWM0,
> +SIFIVE_U_DEV_PWM1
>  };
>
>  enum {
> @@ -126,6 +130,14 @@ enum {
>  SIFIVE_U_PDMA_IRQ5 = 28,
>  SIFIVE_U_PDMA_IRQ6 = 29,
>  SIFIVE_U_PDMA_IRQ7 = 30,
> +SIFIVE_U_PWM0_IRQ0 = 42,
> +SIFIVE_U_PWM0_IRQ1 = 43,
> +SIFIVE_U_PWM0_IRQ2 = 44,
> +SIFIVE_U_PWM0_IRQ3 = 45,
> +SIFIVE_U_PWM1_IRQ0 = 46,
> +SIFIVE_U_PWM1_IRQ1 = 47,
> +SIFIVE_U_PWM1_IRQ2 = 48,
> +SIFIVE_U_PWM1_IRQ3 = 49,
>  SIFIVE_U_QSPI0_IRQ = 51,
>  SIFIVE_U_GEM_IRQ = 53
>  };
> diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
> index 6cc1a62b0f..2301f5dd4f 100644
> --- a/hw/riscv/sifive_u.c
> +++ b/hw/riscv/sifive_u.c
> @@ -17,6 +17,7 @@
>   * 7) DMA (Direct Memory Access Controller)
>   * 8) SPI0 connected to an SPI flash
>   * 9) SPI2 connected to an SD card
> + * 10) PWM0 and PWM1
>   *
>   * This board currently generates devicetree dynamically that indicates at 
> least
>   * two harts and up to five harts.
> @@ -75,6 +76,8 @@ static const MemMapEntry sifive_u_memmap[] = {
>  [SIFIVE_U_DEV_PRCI] = { 0x1000, 0x1000 },
>  [SIFIVE_U_DEV_UART0] ={ 0x1001, 0x1000 },
>  [SIFIVE_U_DEV_UART1] ={ 0x10011000, 0x1000 },
> +[SIFIVE_U_DEV_PWM0] = { 0x1002, 0x1000 },
> +[SIFIVE_U_DEV_PWM1] = { 0x10021000, 0x1000 },
>  [SIFIVE_U_DEV_QSPI0] ={ 0x1004, 0x1000 },
>  [SIFIVE_U_DEV_QSPI2] ={ 0x1005, 0x1000 },
>  [SIFIVE_U_DEV_GPIO] = { 0x1006, 0x1000 },
> @@ -441,6 +444,38 @@ static void create_fdt(SiFiveUState *s, const 
> MemMapEntry *memmap,
>  qemu_fdt_setprop_cell(fdt, nodename, "reg", 0x0);
>  g_free(nodename);
>
> +nodename = g_strdup_printf("/soc/pwm@%lx",
> +(long)memmap[SIFIVE_U_DEV_PWM0].base);
> +qemu_fdt_add_subnode(fdt, nodename);
> +qemu_fdt_setprop_string(fdt, nodename, "compatible", "sifive,pwm0");
> +qemu_fdt_setprop_cells(fdt, nodename, "reg",
> +0x0, memmap[SIFIVE_U_DEV_PWM0].base,
> +0x0, memmap[SIFIVE_U_DEV_PWM0].size);
> +qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle);
> +qemu_fdt_setprop_cells(fdt, nodename, "interrupts",
> +   SIFIVE_U_PWM0_IRQ0, SIFIVE_U_PWM0_IRQ1,
> +   SIFIVE_U_PWM0_IRQ2, SIFIVE_U_PWM0_IRQ3);
> +qemu_fdt_setprop_cells(fdt, nodename, "clocks",
> +   prci_phandle, PRCI_CLK_TLCLK);
> +qemu_fdt_setprop_cell(fdt, nodename, "#pwm-cells", 0);
> +g_free(nodename);
> +
> +nodename = g_strdup_printf("/soc/pwm@%lx",
> +(long)memmap[SIFIVE_U_DEV_PWM1].base);
> +qemu_fdt_add_subnode(fdt, nodename);
> +qemu_fdt_setprop_string(fdt, nodename, "compatible", "sifive,pwm0");
> +qemu_fdt_setprop_cells(fdt, nodename, "reg",
> +0x0, memmap[SIFIVE_U_DEV_PWM1].base,
> +0x0, memmap[SIFIVE_U_DEV_PWM1].size);
> +qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle);
> +qemu_fdt_setprop_cells(fdt, nodename, "interrupts",
> +   SIFIVE_U_PWM1_IRQ0, SIFIVE_U_PWM1_IRQ1,
> +

Re: [PATCH v3 1/2] hw/timer: Add SiFive PWM support

2021-09-08 Thread Bin Meng

On Thu, Sep 9, 2021 at 11:55 AM Alistair Francis
 wrote:
>
> From: Alistair Francis 
>
> This is the initial commit of the SiFive PWM timer. This is used by
> guest software as a timer and is included in the SiFive FU540 SoC.
>
> Signed-off-by: Justin Restivo 
> Signed-off-by: Alexandra Clifford 
> Signed-off-by: Amanda Strnad 
> Signed-off-by: Alistair Francis 
> ---
>  include/hw/timer/sifive_pwm.h |  62 +
>  hw/timer/sifive_pwm.c | 467 ++
>  hw/timer/Kconfig  |   3 +
>  hw/timer/meson.build  |   1 +
>  hw/timer/trace-events |   6 +
>  5 files changed, 539 insertions(+)
>  create mode 100644 include/hw/timer/sifive_pwm.h
>  create mode 100644 hw/timer/sifive_pwm.c
>

Reviewed-by: Bin Meng

Re: [PATCH v10 2/7] [RISCV_PM] Add CSR defines for RISC-V PM extension

2021-09-08 Thread Alistair Francis

On Mon, Aug 30, 2021 at 3:51 AM Alexey Baturo  wrote:
>
> Signed-off-by: Alexey Baturo 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_bits.h | 96 +
>  1 file changed, 96 insertions(+)
>
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 7330ff5a19..140178d23c 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -334,6 +334,38 @@
>  #define CSR_MHPMCOUNTER30H  0xb9e
>  #define CSR_MHPMCOUNTER31H  0xb9f
>
> +/*
> + * User PointerMasking registers
> + * NB: actual CSR numbers might be changed in future
> + */
> +#define CSR_UMTE0x4c0
> +#define CSR_UPMMASK 0x4c1
> +#define CSR_UPMBASE 0x4c2
> +
> +/*
> + * Machine PointerMasking registers
> + * NB: actual CSR numbers might be changed in future
> + */
> +#define CSR_MMTE0x3c0
> +#define CSR_MPMMASK 0x3c1
> +#define CSR_MPMBASE 0x3c2
> +
> +/*
> + * Supervisor PointerMaster registers
> + * NB: actual CSR numbers might be changed in future
> + */
> +#define CSR_SMTE0x1c0
> +#define CSR_SPMMASK 0x1c1
> +#define CSR_SPMBASE 0x1c2
> +
> +/*
> + * Hypervisor PointerMaster registers
> + * NB: actual CSR numbers might be changed in future
> + */
> +#define CSR_VSMTE   0x2c0
> +#define CSR_VSPMMASK0x2c1
> +#define CSR_VSPMBASE0x2c2
> +
>  /* mstatus CSR bits */
>  #define MSTATUS_UIE 0x0001
>  #define MSTATUS_SIE 0x0002
> @@ -531,4 +563,68 @@ typedef enum RISCVException {
>  #define MIE_UTIE   (1 << IRQ_U_TIMER)
>  #define MIE_SSIE   (1 << IRQ_S_SOFT)
>  #define MIE_USIE   (1 << IRQ_U_SOFT)
> +
> +/* General PointerMasking CSR bits*/
> +#define PM_ENABLE   0x0001ULL
> +#define PM_CURRENT  0x0002ULL
> +#define PM_INSN 0x0004ULL
> +#define PM_XS_MASK  0x0003ULL
> +
> +/* PointerMasking XS bits values */
> +#define PM_EXT_DISABLE  0xULL
> +#define PM_EXT_INITIAL  0x0001ULL
> +#define PM_EXT_CLEAN0x0002ULL
> +#define PM_EXT_DIRTY0x0003ULL
> +
> +/* Offsets for every pair of control bits per each priv level */
> +#define XS_OFFSET0ULL
> +#define U_OFFSET 2ULL
> +#define S_OFFSET 5ULL
> +#define M_OFFSET 8ULL
> +
> +#define PM_XS_BITS   (PM_XS_MASK << XS_OFFSET)
> +#define U_PM_ENABLE  (PM_ENABLE  << U_OFFSET)
> +#define U_PM_CURRENT (PM_CURRENT << U_OFFSET)
> +#define U_PM_INSN(PM_INSN<< U_OFFSET)
> +#define S_PM_ENABLE  (PM_ENABLE  << S_OFFSET)
> +#define S_PM_CURRENT (PM_CURRENT << S_OFFSET)
> +#define S_PM_INSN(PM_INSN<< S_OFFSET)
> +#define M_PM_ENABLE  (PM_ENABLE  << M_OFFSET)
> +#define M_PM_CURRENT (PM_CURRENT << M_OFFSET)
> +#define M_PM_INSN(PM_INSN<< M_OFFSET)
> +
> +/* mmte CSR bits */
> +#define MMTE_PM_XS_BITS PM_XS_BITS
> +#define MMTE_U_PM_ENABLEU_PM_ENABLE
> +#define MMTE_U_PM_CURRENT   U_PM_CURRENT
> +#define MMTE_U_PM_INSN  U_PM_INSN
> +#define MMTE_S_PM_ENABLES_PM_ENABLE
> +#define MMTE_S_PM_CURRENT   S_PM_CURRENT
> +#define MMTE_S_PM_INSN  S_PM_INSN
> +#define MMTE_M_PM_ENABLEM_PM_ENABLE
> +#define MMTE_M_PM_CURRENT   M_PM_CURRENT
> +#define MMTE_M_PM_INSN  M_PM_INSN
> +#define MMTE_MASK(MMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT | MMTE_U_PM_INSN 
> | \
> +  MMTE_S_PM_ENABLE | MMTE_S_PM_CURRENT | MMTE_S_PM_INSN 
> | \
> +  MMTE_M_PM_ENABLE | MMTE_M_PM_CURRENT | MMTE_M_PM_INSN 
> | \
> +  MMTE_PM_XS_BITS)
> +
> +/* (v)smte CSR bits */
> +#define SMTE_PM_XS_BITS PM_XS_BITS
> +#define SMTE_U_PM_ENABLEU_PM_ENABLE
> +#define SMTE_U_PM_CURRENT   U_PM_CURRENT
> +#define SMTE_U_PM_INSN  U_PM_INSN
> +#define SMTE_S_PM_ENABLES_PM_ENABLE
> +#define SMTE_S_PM_CURRENT   S_PM_CURRENT
> +#define SMTE_S_PM_INSN  S_PM_INSN
> +#define SMTE_MASK(SMTE_U_PM_ENABLE | SMTE_U_PM_CURRENT | SMTE_U_PM_INSN 
> | \
> +  SMTE_S_PM_ENABLE | SMTE_S_PM_CURRENT | SMTE_S_PM_INSN 
> | \
> +  SMTE_PM_XS_BITS)
> +
> +/* umte CSR bits */
> +#define UMTE_U_PM_ENABLEU_PM_ENABLE
> +#define UMTE_U_PM_CURRENT   U_PM_CURRENT
> +#define UMTE_U_PM_INSN  U_PM_INSN
> +#define UMTE_MASK (UMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT | UMTE_U_PM_INSN)
> +
>  #endif
> --
> 2.20.1
>
>

Re: [PATCH] hw/i386/acpi-build: Fix a typo

2021-09-08 Thread Ani Sinha



On Thu, 9 Sep 2021, Philippe Mathieu-Daudé wrote:

> Fix 'hotplugabble' -> 'hotpluggabble' typo.
>
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Ani Sinha 

> ---
>  hw/i386/acpi-build.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index d1f5fa3b5a5..478263e12c9 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -1916,7 +1916,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
> MachineState *machine)
>  PCMachineState *pcms = PC_MACHINE(machine);
>  int nb_numa_nodes = machine->numa_state->num_nodes;
>  NodeInfo *numa_info = machine->numa_state->nodes;
> -ram_addr_t hotplugabble_address_space_size =
> +ram_addr_t hotpluggabble_address_space_size =
>  object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE,
>  NULL);
>
> @@ -2022,10 +2022,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
> MachineState *machine)
>   * Memory devices may override proximity set by this entry,
>   * providing _PXM method if necessary.
>   */
> -if (hotplugabble_address_space_size) {
> +if (hotpluggabble_address_space_size) {
>  numamem = acpi_data_push(table_data, sizeof *numamem);
>  build_srat_memory(numamem, machine->device_memory->base,
> -  hotplugabble_address_space_size, nb_numa_nodes - 1,
> +  hotpluggabble_address_space_size, nb_numa_nodes - 
> 1,
>MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
>  }
>
> --
> 2.31.1
>
>

[PATCH v3 1/2] hw/timer: Add SiFive PWM support

2021-09-08 Thread Alistair Francis

From: Alistair Francis 

This is the initial commit of the SiFive PWM timer. This is used by
guest software as a timer and is included in the SiFive FU540 SoC.

Signed-off-by: Justin Restivo 
Signed-off-by: Alexandra Clifford 
Signed-off-by: Amanda Strnad 
Signed-off-by: Alistair Francis 
---
 include/hw/timer/sifive_pwm.h |  62 +
 hw/timer/sifive_pwm.c | 467 ++
 hw/timer/Kconfig  |   3 +
 hw/timer/meson.build  |   1 +
 hw/timer/trace-events |   6 +
 5 files changed, 539 insertions(+)
 create mode 100644 include/hw/timer/sifive_pwm.h
 create mode 100644 hw/timer/sifive_pwm.c

diff --git a/include/hw/timer/sifive_pwm.h b/include/hw/timer/sifive_pwm.h
new file mode 100644
index 00..6a8cf7b29e
--- /dev/null
+++ b/include/hw/timer/sifive_pwm.h
@@ -0,0 +1,62 @@
+/*
+ * SiFive PWM
+ *
+ * Copyright (c) 2020 Western Digital
+ *
+ * Author:  Alistair Francis 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_SIFIVE_PWM_H
+#define HW_SIFIVE_PWM_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+
+#define TYPE_SIFIVE_PWM "sifive-pwm"
+
+#define SIFIVE_PWM(obj) \
+OBJECT_CHECK(SiFivePwmState, (obj), TYPE_SIFIVE_PWM)
+
+#define SIFIVE_PWM_CHANS  4
+#define SIFIVE_PWM_IRQS   SIFIVE_PWM_CHANS
+
+typedef struct SiFivePwmState {
+/*  */
+SysBusDevice parent_obj;
+
+/*  */
+MemoryRegion mmio;
+QEMUTimer timer[SIFIVE_PWM_CHANS];
+/*
+ * if en bit(s) set, is the number of ticks when pwmcount was 0
+ * if en bit(s) not set, is the number of ticks in pwmcount
+ */
+uint64_t tick_offset;
+uint64_t freq_hz;
+
+uint32_t pwmcfg;
+uint32_t pwmcmp[SIFIVE_PWM_CHANS];
+
+qemu_irq irqs[SIFIVE_PWM_IRQS];
+} SiFivePwmState;
+
+#endif /* HW_SIFIVE_PWM_H */
diff --git a/hw/timer/sifive_pwm.c b/hw/timer/sifive_pwm.c
new file mode 100644
index 00..5565ff812a
--- /dev/null
+++ b/hw/timer/sifive_pwm.c
@@ -0,0 +1,467 @@
+/*
+ * SiFive PWM
+ *
+ * Copyright (c) 2020 Western Digital
+ *
+ * Author:  Alistair Francis 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "hw/timer/sifive_pwm.h"
+#include "hw/qdev-properties.h"
+#include "hw/registerfields.h"
+#include "migration/vmstate.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+
+#define HAS_PWM_EN_BITS(cfg) ((cfg & R_CONFIG_ENONESHOT_MASK) || \
+  (cfg & R_CONFIG_ENALWAYS_MASK))
+
+#define PWMCMP_MASK 0x
+#define PWMCOUNT_MASK 0x7FFF
+
+REG32(CONFIG,   0x00)
+FIELD(CONFIG, SCALE,0, 4)
+FIELD(CONFIG, STICKY,   8, 1)
+FIELD(CONFIG, ZEROCMP,  9, 1)
+FIELD(CONFIG, DEGLITCH, 10, 1)
+FIELD(CONFIG, ENALWAYS, 12, 1)
+FIELD(CONFIG, ENONESHOT,13, 1)
+

[PATCH v3 2/2] sifive_u: Connect the SiFive PWM device

2021-09-08 Thread Alistair Francis

From: Alistair Francis 

Connect the SiFive PWM device and expose it via the device tree.

Signed-off-by: Alistair Francis 
---
 docs/system/riscv/sifive_u.rst |  1 +
 include/hw/riscv/sifive_u.h| 14 -
 hw/riscv/sifive_u.c| 55 +-
 hw/timer/sifive_pwm.c  |  1 +
 hw/riscv/Kconfig   |  1 +
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/docs/system/riscv/sifive_u.rst b/docs/system/riscv/sifive_u.rst
index 01108b5ecc..7c65e9c440 100644
--- a/docs/system/riscv/sifive_u.rst
+++ b/docs/system/riscv/sifive_u.rst
@@ -24,6 +24,7 @@ The ``sifive_u`` machine supports the following devices:
 * 2 QSPI controllers
 * 1 ISSI 25WP256 flash
 * 1 SD card in SPI mode
+* PWM0 and PWM1
 
 Please note the real world HiFive Unleashed board has a fixed configuration of
 1 E51 core and 4 U54 core combination and the RISC-V core boots in 64-bit mode.
diff --git a/include/hw/riscv/sifive_u.h b/include/hw/riscv/sifive_u.h
index 2656b39808..f71c90c94c 100644
--- a/include/hw/riscv/sifive_u.h
+++ b/include/hw/riscv/sifive_u.h
@@ -27,6 +27,7 @@
 #include "hw/misc/sifive_u_otp.h"
 #include "hw/misc/sifive_u_prci.h"
 #include "hw/ssi/sifive_spi.h"
+#include "hw/timer/sifive_pwm.h"
 
 #define TYPE_RISCV_U_SOC "riscv.sifive.u.soc"
 #define RISCV_U_SOC(obj) \
@@ -49,6 +50,7 @@ typedef struct SiFiveUSoCState {
 SiFiveSPIState spi0;
 SiFiveSPIState spi2;
 CadenceGEMState gem;
+SiFivePwmState pwm[2];
 
 uint32_t serial;
 char *cpu_type;
@@ -92,7 +94,9 @@ enum {
 SIFIVE_U_DEV_FLASH0,
 SIFIVE_U_DEV_DRAM,
 SIFIVE_U_DEV_GEM,
-SIFIVE_U_DEV_GEM_MGMT
+SIFIVE_U_DEV_GEM_MGMT,
+SIFIVE_U_DEV_PWM0,
+SIFIVE_U_DEV_PWM1
 };
 
 enum {
@@ -126,6 +130,14 @@ enum {
 SIFIVE_U_PDMA_IRQ5 = 28,
 SIFIVE_U_PDMA_IRQ6 = 29,
 SIFIVE_U_PDMA_IRQ7 = 30,
+SIFIVE_U_PWM0_IRQ0 = 42,
+SIFIVE_U_PWM0_IRQ1 = 43,
+SIFIVE_U_PWM0_IRQ2 = 44,
+SIFIVE_U_PWM0_IRQ3 = 45,
+SIFIVE_U_PWM1_IRQ0 = 46,
+SIFIVE_U_PWM1_IRQ1 = 47,
+SIFIVE_U_PWM1_IRQ2 = 48,
+SIFIVE_U_PWM1_IRQ3 = 49,
 SIFIVE_U_QSPI0_IRQ = 51,
 SIFIVE_U_GEM_IRQ = 53
 };
diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index 6cc1a62b0f..2301f5dd4f 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -17,6 +17,7 @@
  * 7) DMA (Direct Memory Access Controller)
  * 8) SPI0 connected to an SPI flash
  * 9) SPI2 connected to an SD card
+ * 10) PWM0 and PWM1
  *
  * This board currently generates devicetree dynamically that indicates at 
least
  * two harts and up to five harts.
@@ -75,6 +76,8 @@ static const MemMapEntry sifive_u_memmap[] = {
 [SIFIVE_U_DEV_PRCI] = { 0x1000, 0x1000 },
 [SIFIVE_U_DEV_UART0] ={ 0x1001, 0x1000 },
 [SIFIVE_U_DEV_UART1] ={ 0x10011000, 0x1000 },
+[SIFIVE_U_DEV_PWM0] = { 0x1002, 0x1000 },
+[SIFIVE_U_DEV_PWM1] = { 0x10021000, 0x1000 },
 [SIFIVE_U_DEV_QSPI0] ={ 0x1004, 0x1000 },
 [SIFIVE_U_DEV_QSPI2] ={ 0x1005, 0x1000 },
 [SIFIVE_U_DEV_GPIO] = { 0x1006, 0x1000 },
@@ -441,6 +444,38 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry 
*memmap,
 qemu_fdt_setprop_cell(fdt, nodename, "reg", 0x0);
 g_free(nodename);
 
+nodename = g_strdup_printf("/soc/pwm@%lx",
+(long)memmap[SIFIVE_U_DEV_PWM0].base);
+qemu_fdt_add_subnode(fdt, nodename);
+qemu_fdt_setprop_string(fdt, nodename, "compatible", "sifive,pwm0");
+qemu_fdt_setprop_cells(fdt, nodename, "reg",
+0x0, memmap[SIFIVE_U_DEV_PWM0].base,
+0x0, memmap[SIFIVE_U_DEV_PWM0].size);
+qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle);
+qemu_fdt_setprop_cells(fdt, nodename, "interrupts",
+   SIFIVE_U_PWM0_IRQ0, SIFIVE_U_PWM0_IRQ1,
+   SIFIVE_U_PWM0_IRQ2, SIFIVE_U_PWM0_IRQ3);
+qemu_fdt_setprop_cells(fdt, nodename, "clocks",
+   prci_phandle, PRCI_CLK_TLCLK);
+qemu_fdt_setprop_cell(fdt, nodename, "#pwm-cells", 0);
+g_free(nodename);
+
+nodename = g_strdup_printf("/soc/pwm@%lx",
+(long)memmap[SIFIVE_U_DEV_PWM1].base);
+qemu_fdt_add_subnode(fdt, nodename);
+qemu_fdt_setprop_string(fdt, nodename, "compatible", "sifive,pwm0");
+qemu_fdt_setprop_cells(fdt, nodename, "reg",
+0x0, memmap[SIFIVE_U_DEV_PWM1].base,
+0x0, memmap[SIFIVE_U_DEV_PWM1].size);
+qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle);
+qemu_fdt_setprop_cells(fdt, nodename, "interrupts",
+   SIFIVE_U_PWM1_IRQ0, SIFIVE_U_PWM1_IRQ1,
+   SIFIVE_U_PWM1_IRQ2, SIFIVE_U_PWM1_IRQ3);
+qemu_fdt_setprop_cells(fdt, nodename, "clocks",
+   prci_phandle, PRCI_CLK_TLCLK);
+qemu_fdt_setprop_cell(fdt, nodename, "#pwm-cells", 0);
+g_free(nodename);
+
 nodename = g_strdup_printf("/soc/serial@%lx",

[PATCH v3 0/2] Add the SiFive PWM device

2021-09-08 Thread Alistair Francis

From: Alistair Francis 


This series adds a the SiFive PWM device and connects it to the
sifive_u machine. This has been tested as a timer with seL4.

v3:
 - Small fixups
v2:
 - Address Bin's comments
 - Expose PWM via the device tree


Alistair Francis (2):
  hw/timer: Add SiFive PWM support
  sifive_u: Connect the SiFive PWM device

 docs/system/riscv/sifive_u.rst |   1 +
 include/hw/riscv/sifive_u.h|  14 +-
 include/hw/timer/sifive_pwm.h  |  62 +
 hw/riscv/sifive_u.c|  55 +++-
 hw/timer/sifive_pwm.c  | 468 +
 hw/riscv/Kconfig   |   1 +
 hw/timer/Kconfig   |   3 +
 hw/timer/meson.build   |   1 +
 hw/timer/trace-events  |   6 +
 9 files changed, 609 insertions(+), 2 deletions(-)
 create mode 100644 include/hw/timer/sifive_pwm.h
 create mode 100644 hw/timer/sifive_pwm.c

-- 
2.31.1

RE: [PATCH] vmdk: allow specification of tools version

2021-09-08 Thread Weissschuh, Thomas [ext]

> -Original Message-
> From: Eric Blake 
> Sent: Wednesday, September 8, 2021 7:56 PM
> To: Weissschuh, Thomas [ext] 
> Cc: Fam Zheng ; Kevin Wolf ; Hanna Reitz
> ; Markus Armbruster ; qemu-
> bl...@nongnu.org; tho...@t-8ch.de; qemu-devel@nongnu.org
> Subject: Re: [PATCH] vmdk: allow specification of tools version
> 
> On Wed, Sep 08, 2021 at 07:42:50PM +0200, Thomas Weißschuh wrote:
> > VMDK files support an attribute that represents the version of the
> > guest tools that are installed on the disk.
> > This attribute is used by vSphere before a machine has been started to
> > determine if the VM has the guest tools installed.
> > This is important when configuring "Operating system customizations"
> > in vSphere, as it checks for the presence of the guest tools before
> > allowing those customizations.
> > Thus when the VM has not yet booted normally it would be impossible to
> > customize it, therefore preventing a customized first-boot.
> >
> > The attribute should not hurt on disks that do not have the guest
> > tools installed and indeed the VMware tools also unconditionally add
> > this attribute.
> > (Defaulting to the value "2147483647", as is done in this patch)
> >
> > Signed-off-by: Thomas Weißschuh 
> > ---
> >  block/vmdk.c | 24 
> >  qapi/block-core.json |  2 ++
> >  2 files changed, 22 insertions(+), 4 deletions(-)
> 
> UI review:
> 
> > +++ b/qapi/block-core.json
> > @@ -4597,6 +4597,7 @@
> >  # @adapter-type: The adapter type used to fill in the descriptor.
> Default: ide.
> >  # @hwversion: Hardware version. The meaningful options are "4" or "6".
> >  # Default: "4".
> > +# @toolsversion: VMware guest tools version.
> 
> Missing a '(since 6.2)' blurb, and a description of its default value.

I'll add that to v2, which I'll send as soon as some more review comments came 
in.

> >  # @zeroed-grain: Whether to enable zeroed-grain feature for sparse
> subformats.
> >  #Default: false.
> >  #
> > @@ -4610,6 +4611,7 @@
> >  '*backing-file':'str',
> >  '*adapter-type':'BlockdevVmdkAdapterType',
> >  '*hwversion':   'str',
> > +'*toolsversion':'str',
> 
> Is it an arbitrary string, or must a valid value always be parseable as a
> numeric value?  If the latter, then make the QMP representation numeric
> rather than string.

In the vmdk itself it is an arbitrary string but the actual values seem to be 
always numeric.
But I do have some very faint recollection of seeing normal version numbers 
(a.b.c) there at some point, but can't confirm this at the moment.

Thomas

Re: [PULL v4 13/43] vl: Add sgx compound properties to expose SGX EPC sections to guest

2021-09-08 Thread Yang Zhong

On Wed, Sep 08, 2021 at 09:52:40AM -0500, Eric Blake wrote:
> On Wed, Sep 08, 2021 at 12:03:56PM +0200, Paolo Bonzini wrote:
> > From: Sean Christopherson 
> > 
> > Because SGX EPC is enumerated through CPUID, EPC "devices" need to be
> > realized prior to realizing the vCPUs themselves, i.e. long before
> > generic devices are parsed and realized.  From a virtualization
> > perspective, the CPUID aspect also means that EPC sections cannot be
> > hotplugged without paravirtualizing the guest kernel (hardware does
> > not support hotplugging as EPC sections must be locked down during
> > pre-boot to provide EPC's security properties).
> > 
> 
> >  qapi/machine.json | 26 +++
> >  qemu-options.hx   | 10 --
> >  9 files changed, 166 insertions(+), 8 deletions(-)
> >  create mode 100644 hw/i386/sgx.c
> ...
> > +++ b/qapi/machine.json
> > @@ -1194,6 +1194,32 @@
> >}
> >  }
> >  
> > +##
> > +# @SgxEPC:
> > +#
> > +# Sgx EPC cmdline information
> > +#
> > +# @memdev: memory backend linked with device
> > +#
> > +# Since: 6.1
> 
> Another instance where we'll want the followup patch to correct things
> to 6.2.
> 
  
  Eric, i will cleanup this in the next version of 
https://patchew.org/QEMU/20210908081937.77254-1-yang.zh...@intel.com/.
  There is one special patch to do pure cleanup based on this PULL. thanks!

  Yang

Re: [PATCH] ebpf: only include in system emulators

2021-09-08 Thread Jason Wang

On Wed, Sep 8, 2021 at 1:46 PM Paolo Bonzini  wrote:
>
> On 08/09/21 05:08, Jason Wang wrote:
> >
> > 在 2021/9/7 下午6:45, Paolo Bonzini 写道:
> >> eBPF files are being included in system emulators, which is useless
> >
> >
> > I think it should work since it's an independent feature. The current
> > use case is to offload the RSS from Qemu to kernel TAP.
>
> Sorry, I meant "user emulators".  That should make more sense, they
> don't have TAP at all.
>
> Paolo
>

I see so I've queued this.

Thanks

Re: [PATCH] virtio-net: fix use after unmap/free for sg

2021-09-08 Thread Jason Wang




在 2021/9/2 下午1:44, Jason Wang 写道:

When mergeable buffer is enabled, we try to set the num_buffers after
the virtqueue elem has been unmapped. This will lead several issues,
E.g a use after free when the descriptor has an address which belongs
to the non direct access region. In this case we use bounce buffer
that is allocated during address_space_map() and freed during
address_space_unmap().

Fixing this by storing the elems temporarily in an array and delay the
unmap after we set the the num_buffers.

This addresses CVE-2021-3748.

Reported-by: Alexander Bulekov 
Fixes: fbe78f4f55c6 ("virtio-net support")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Jason Wang 



Applied.

Thanks



---
  hw/net/virtio-net.c | 39 ---
  1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 16d20cdee5..f205331dcf 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1746,10 +1746,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
  VirtIONet *n = qemu_get_nic_opaque(nc);
  VirtIONetQueue *q = virtio_net_get_subqueue(nc);
  VirtIODevice *vdev = VIRTIO_DEVICE(n);
+VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
+size_t lens[VIRTQUEUE_MAX_SIZE];
  struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
  struct virtio_net_hdr_mrg_rxbuf mhdr;
  unsigned mhdr_cnt = 0;
-size_t offset, i, guest_offset;
+size_t offset, i, guest_offset, j;
+ssize_t err;
  
  if (!virtio_net_can_receive(nc)) {

  return -1;
@@ -1780,6 +1783,12 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
  
  total = 0;
  
+if (i == VIRTQUEUE_MAX_SIZE) {

+virtio_error(vdev, "virtio-net unexpected long buffer chain");
+err = size;
+goto err;
+}
+
  elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
  if (!elem) {
  if (i) {
@@ -1791,7 +1800,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
   n->guest_hdr_len, n->host_hdr_len,
   vdev->guest_features);
  }
-return -1;
+err = -1;
+goto err;
  }
  
  if (elem->in_num < 1) {

@@ -1799,7 +1809,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
   "virtio-net receive queue contains no in buffers");
  virtqueue_detach_element(q->rx_vq, elem, 0);
  g_free(elem);
-return -1;
+err = -1;
+goto err;
  }
  
  sg = elem->in_sg;

@@ -1836,12 +1847,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
  if (!n->mergeable_rx_bufs && offset < size) {
  virtqueue_unpop(q->rx_vq, elem, total);
  g_free(elem);
-return size;
+err = size;
+goto err;
  }
  
-/* signal other side */

-virtqueue_fill(q->rx_vq, elem, total, i++);
-g_free(elem);
+elems[i] = elem;
+lens[i] = total;
+i++;
  }
  
  if (mhdr_cnt) {

@@ -1851,10 +1863,23 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
   _buffers, sizeof mhdr.num_buffers);
  }
  
+for (j = 0; j < i; j++) {

+/* signal other side */
+virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
+g_free(elems[j]);
+}
+
  virtqueue_flush(q->rx_vq, i);
  virtio_notify(vdev, q->rx_vq);
  
  return size;

+
+err:
+for (j = 0; j < i; j++) {
+g_free(elems[j]);
+}
+
+return err;
  }
  
  static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,

Re: [PATCH 5/7] qmp: Add the qmp_query_sgx_capabilities()

2021-09-08 Thread Yang Zhong

On Wed, Sep 08, 2021 at 10:38:59AM +0200, Philippe Mathieu-Daudé wrote:
> On 9/8/21 10:19 AM, Yang Zhong wrote:
> > Libvirt can use qmp_query_sgx_capabilities() to get the host
> > sgx capabilities.
> > 
> > Signed-off-by: Yang Zhong 
> > ---
> >  hw/i386/sgx.c  | 66 ++
> >  include/hw/i386/sgx.h  |  1 +
> >  qapi/misc-target.json  | 18 +++
> >  target/i386/monitor.c  |  5 +++
> >  tests/qtest/qmp-cmd-test.c |  1 +
> >  5 files changed, 91 insertions(+)
> 
> > +SGXInfo *sgx_get_capabilities(Error **errp)
> > +{
> > +SGXInfo *info = NULL;
> > +uint32_t eax, ebx, ecx, edx;
> > +
> > +int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR);
> > +if (fd < 0) {
> > +error_setg(errp, "SGX is not enabled in KVM");
> > +return NULL;
> > +}
> 
> Is this Linux specific?

  Philippe, The /dev/sgx_vepc node is used for KVM side to expose the SGX
  EPC section to guest. Libvirt then use the '-machine none' qemu command 
  to query host SGX capabilities(especially for host SGX EPC section size)
  to decide how many SGX VMs will be started in server. If this node doesn't
  exist, the reason is host can't support SGX or SGX KVM module is not compiled
  in the kernel. thanks!

  Yang

Re: [PATCH 3/7] i386: Add sgx_get_info() interface

2021-09-08 Thread Yang Zhong

On Wed, Sep 08, 2021 at 10:32:27AM +0200, Philippe Mathieu-Daudé wrote:
> On 9/8/21 10:19 AM, Yang Zhong wrote:
> > Add the sgx_get_info() interface for hmp and QMP usage, which
> > will get the SGX info from this API.
> > 
> > Signed-off-by: Yang Zhong 
> > ---
> >  hw/i386/sgx.c | 21 +
> >  include/hw/i386/sgx.h | 11 +++
> >  target/i386/monitor.c | 32 
> >  3 files changed, 60 insertions(+), 4 deletions(-)
> >  create mode 100644 include/hw/i386/sgx.h
> 
> > @@ -766,12 +767,35 @@ qmp_query_sev_attestation_report(const char *mnonce, 
> > Error **errp)
> >  
> >  SGXInfo *qmp_query_sgx(Error **errp)
> >  {
> > -error_setg(errp, QERR_FEATURE_DISABLED, "query-sgx");
> 
> >  void hmp_info_sgx(Monitor *mon, const QDict *qdict)
> >  {
> > -error_setg(errp, QERR_FEATURE_DISABLED, "query-sgx");
> > -return NULL;
> 
> What is the point of patches #1 & #2? Why not squash all here?

  Philippe, The different user usage, Monitor and QMP tool to get the some info 
from VM.
  I am okay to squash those 3 patches into ones, thanks!

  Yang

Re: [PATCH] coroutine: resize pool periodically instead of limiting size

2021-09-08 Thread Daniele Buono


Stefan, the patch looks great.
Thank you for debugging the performance issue that was happening with
SafeStack.

On 9/2/2021 4:10 AM, Stefan Hajnoczi wrote:

On Wed, Sep 01, 2021 at 05:09:23PM +0100, Stefan Hajnoczi wrote:

It was reported that enabling SafeStack reduces IOPS significantly
(>25%) with the following fio benchmark on virtio-blk using a NVMe host
block device:

   # fio --rw=randrw --bs=4k --iodepth=64 --runtime=1m --direct=1 \
--filename=/dev/vdb --name=job1 --ioengine=libaio --thread \
--group_reporting --numjobs=16 --time_based \
 --output=/tmp/fio_result

Serge Guelton and I found that SafeStack is not really at fault, it just
increases the cost of coroutine creation. This fio workload exhausts the
coroutine pool and coroutine creation becomes a bottleneck. Previous
work by Honghao Wang also pointed to excessive coroutine creation.

Creating new coroutines is expensive due to allocating new stacks with
mmap(2) and mprotect(2). Currently there are thread-local and global
pools that recycle old Coroutine objects and their stacks but the
hardcoded size limit of 64 for thread-local pools and 128 for the global
pool is insufficient for the fio benchmark shown above.

This patch changes the coroutine pool algorithm to a simple thread-local
pool without a size limit. Threads periodically shrink the pool down to
a size sufficient for the maximum observed number of coroutines.

This is a very simple algorithm. Fancier things could be done like
keeping a minimum number of coroutines around to avoid latency when a
new coroutine is created after a long period of inactivity. Another
thought is to stop the timer when the pool size is zero for power saving
on threads that aren't using coroutines. However, I'd rather not add
bells and whistles unless they are really necessary.


I agree we should aim at something that is as simple as possible.

However keeping a minumum number of coroutines could be useful to avoid
performance regressions from the previous version.

I wouldn't say restore the global pool - that's way too much trouble -
but keeping the old "pool size" configuration parameter as the miniumum
pool size could be a good idea to maintain the previous performance in
cases where the dynamic pool shrinks too much.



The global pool is removed by this patch. It can help to hide the fact
that local pools are easily exhausted, but it's doesn't fix the root
cause. I don't think there is a need for a global pool because QEMU's
threads are long-lived, so let's keep things simple.

Performance of the above fio benchmark is as follows:

   Before   After
IOPS 60k 97k

Memory usage varies over time as needed by the workload:

 VSZ (KB) RSS (KB)
Before fio  4705248  843128
During fio  5747668 (+ ~100 MB)  849280
After fio   4694996 (- ~100 MB)  845184

This confirms that coroutines are indeed being freed when no longer
needed.

Thanks to Serge Guelton for working on identifying the bottleneck with
me!

Reported-by: Tingting Mao 
Cc: Serge Guelton 
Cc: Honghao Wang 
Cc: Paolo Bonzini 
Cc: Daniele Buono 
Signed-off-by: Stefan Hajnoczi 
---
  include/qemu/coroutine-pool-timer.h | 36 +
  include/qemu/coroutine.h|  7 
  iothread.c  |  6 +++
  util/coroutine-pool-timer.c | 35 
  util/main-loop.c|  5 +++
  util/qemu-coroutine.c   | 62 ++---
  util/meson.build|  1 +
  7 files changed, 119 insertions(+), 33 deletions(-)
  create mode 100644 include/qemu/coroutine-pool-timer.h
  create mode 100644 util/coroutine-pool-timer.c


Adding Andrew and Jenifer in case they have thoughts on improving QEMU's
coroutine pool algorithm.



diff --git a/include/qemu/coroutine-pool-timer.h 
b/include/qemu/coroutine-pool-timer.h
new file mode 100644
index 00..c0b520ce99
--- /dev/null
+++ b/include/qemu/coroutine-pool-timer.h
@@ -0,0 +1,36 @@
+/*
+ * QEMU coroutine pool timer
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+#ifndef COROUTINE_POOL_TIMER_H
+#define COROUTINE_POOL_TIMER_H
+
+#include "qemu/osdep.h"
+#include "block/aio.h"
+
+/**
+ * A timer that periodically resizes this thread's coroutine pool, freeing
+ * memory if there are too many unused coroutines.
+ *
+ * Threads that make heavy use of coroutines should use this. Failure to resize
+ * the coroutine pool can lead to large amounts of memory sitting idle and
+ * never being used after the first time.
+ */
+typedef struct {
+QEMUTimer *timer;
+} CoroutinePoolTimer;
+
+/* Call this before the thread runs the AioContext */
+void coroutine_pool_timer_init(CoroutinePoolTimer *pt, AioContext *ctx);
+
+/* Call this before the AioContext from

Re: [PATCH 4/7] bitops: Support 32 and 64 bit mask macro

2021-09-08 Thread Yang Zhong

On Wed, Sep 08, 2021 at 10:34:39AM +0200, Philippe Mathieu-Daudé wrote:
> On 9/8/21 10:19 AM, Yang Zhong wrote:
> > The Qemu should enable bit mask macro like Linux did in the
> > kernel, the GENMASK(h, l) and GENMASK_ULL(h, l) will set the
> > bit to 1 from l to h bit in the 32 bit or 64 bit long type.
> > 
> > Signed-off-by: Yang Zhong 
> > ---
> >  include/qemu/bitops.h | 7 +++
> >  1 file changed, 7 insertions(+)
> > 
> > diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
> > index 03213ce952..04dec60670 100644
> > --- a/include/qemu/bitops.h
> > +++ b/include/qemu/bitops.h
> > @@ -18,6 +18,7 @@
> >  
> >  #define BITS_PER_BYTE   CHAR_BIT
> >  #define BITS_PER_LONG   (sizeof (unsigned long) * BITS_PER_BYTE)
> > +#define BITS_PER_LONG_LONG   64
> >  
> >  #define BIT(nr) (1UL << (nr))
> >  #define BIT_ULL(nr) (1ULL << (nr))
> > @@ -28,6 +29,12 @@
> >  #define MAKE_64BIT_MASK(shift, length) \
> >  (((~0ULL) >> (64 - (length))) << (shift))
> >  
> > +#define GENMASK(h, l) \
> > +(((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h
> > +
> > +#define GENMASK_ULL(h, l) \
> > +(((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h
> 
> What is the difference with MAKE_64BIT_MASK()??

  Philippe, thanks for comments, i will use MAKE_64BIT_MASK() to replace
  GENMASK_ULL(), and at the same time, this patch will be dropped, thanks!

  Yang

Re: [PATCH v1 2/3] io: Add zerocopy and errqueue

2021-09-08 Thread Peter Xu

On Wed, Sep 08, 2021 at 10:57:06PM +0100, Daniel P. Berrangé wrote:
> We think we're probably ok with migration as we are going to rely on the
> face that we eventually pause the guest to stop page changes during the
> final switchover. None the less I really strongly dislike the idea of
> not honouring the kernel API contract, despite the potential performance
> benefits it brings.

Yes understandable, and it does looks tricky. But it's guest page and it's just
by nature how it works to me (sending page happening in parallel with page
being modified).

I think the MSG_ZEROCOPY doc page mentioned that and it's userspace program's
own choice if that happens. So even if it's not by design and not suggested, it
seems not forbidden either.

We can wr-protect the page (using things like userfaultfd-wp) during sending
and unprotect them when it's done with a qio callback, that'll guarantee the
buffer not changing during sending, however we gain nothing besides the "api
cleaness" then..

Thanks,

-- 
Peter Xu

Re: [PATCH 4/5] ebpf_rss_helper: Added helper for eBPF RSS.

2021-09-08 Thread Jason Wang

On Thu, Sep 9, 2021 at 8:00 AM Yuri Benditovich
 wrote:
>
> On Wed, Sep 8, 2021 at 6:45 AM Jason Wang  wrote:
> >
> > On Tue, Sep 7, 2021 at 6:40 PM Yuri Benditovich
> >  wrote:
> > >
> > > On Wed, Sep 1, 2021 at 9:42 AM Jason Wang  wrote:
> > > >
> > > >
> > > > 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > > > > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang  
> > > > > wrote:
> > > > >>
> > > > >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > > > >>> Helper program. Loads eBPF RSS program and maps and passes them 
> > > > >>> through unix socket.
> > > > >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> > > > >>
> > > > >> I wonder if this can be done as helper for TAP/bridge.
> > > > >>
> > > > >> E.g it's the qemu to launch those helper with set-uid.
> > > > >>
> > > > >> Then libvirt won't even need to care about that?
> > > > >>
> > > > > There are pros and cons for such a solution with set-uid.
> > > > >  From my point of view one of the cons is that set-uid is efficient
> > > > > only at install time so the coexistence of different qemu builds (and
> > > > > different helpers for each one) is kind of problematic.
> > > > > With the current solution this does not present any problem: the
> > > > > developer can have several different builds, each one automatically
> > > > > has its own helper and there is no conflict between these builds and
> > > > > between these builds and installed qemu package. Changing the
> > > > > 'emulator' in the libvirt profile automatically brings the proper
> > > > > helper to work.
> > > >
> > > >
> > > > I'm not sure I get you here. We can still have default/sample helper to
> > > > make sure it works for different builds.
> > > >
> > > > If we can avoid the involvement of libvirt, that would be better.
> > >
> > > Hi Jason,
> > >
> > > Indeed I did not get the idea, can you please explain it in more
> > > details (as detailed as possible to avoid future misunderstanding),
> > > especially how exactly we can use the set-uid and what is the 'default' 
> > > helper.
> > > We also would prefer to do everything from qemu but we do not see how
> > > we can do that.
> >
> >
> Some more questions to understand the idea better:
> > Something like:
> >
> > 1) -netdev tap,rss_helper=/path/to/name
>
> So, on each editing of 'emulator' in the xml  the helper path should
> be set manually or be default?

It could done manually, or we can have a default path.

>
> > 2) having a sample/default helper implemented in Qemu
>
> Does it mean the default helper is the code in the qemu (without
> running additional executable, like it does today)

Yes.

 or this is qemu
> itself with dedicated command line?
> As far as I remember Daniel had strong objections of ever running qemu
> with capabilities

Qemu won't run with capabilities but the helper.

>
> > 3) we can introduce something special path like "default", then if
> > -netdev tap,rss_helper="default" is specified, qemu will use the
> > sample helper
>
> Probably this is not so important but the rss helper and rss in
> general has no relation to netdev, much more they are related to
> virtio-net

So I think the reason for this is that we currently only support
eBPF/RSS for tap.

>
> >
> > So we have:
> > 1) set set-uid for the helper
> Who and when does set-uid to the helper binary? Only installer or
> libvirt can do that, correct?

Yes, it could be done the installer, or other system provision tools.

>
> > 2) libvirt may just choose to launch the default helper
> All this discussion is to avoid launching the helper from libvirt, correct?

Sorry, it's a typo. I meant, libvirt launch qemu, and then qemu will
launch the helper.

Thanks

>
> >
> > >
> > > Our main points (what should be addressed):
> > > - qemu should be able to load ebpf and use the maps when it runs from
> > > libvirt (without special caps) and standalone (with caps)
> >
> > This is solved by leaving the privileged operations to the helper with 
> > set-uid.
> >
> > > - it is possible that there are different qemu builds on the machine,
> > > one of them might be installed, their ebpf's might be different and
> > > the interface between qemu and ebpf (exact content of maps and number
> > > of maps)
> >
> > We can use different helpers in this way.
> >
> > > - qemu configures the RSS dynamically according to the commands
> > > provided by the guest
> >
> > Consider we decided to use mmap() based maps, this is not an issue.
> >
> > Or am I missing something?
> >
> > Thanks
> >
> > >
> > > Thanks in advance
> > > Yuri
> > >
> > > >
> > > > Thanks
> > > >
> > > >
> > > > >
> > > > >>> Also, libbpf dependency now exclusively for Linux.
> > > > >>> Libbpf is used for eBPF RSS steering, which is supported only by 
> > > > >>> Linux TAP.
> > > > >>> There is no reason yet to build eBPF loader and helper for non 
> > > > >>> Linux systems,
> > > > >>> even if libbpf is present.
> > > > >>>
> > > > >>> Signed-off-by: Andrew Melnychenko 
> > > > >>> ---
>

Re: [PATCH v1 3/3] migration: multifd: Enable zerocopy

2021-09-08 Thread Jason Wang

On Wed, Sep 8, 2021 at 11:19 PM Peter Xu  wrote:
>
> On Wed, Sep 08, 2021 at 09:19:27AM +0100, Dr. David Alan Gilbert wrote:
> > * Jason Wang (jasow...@redhat.com) wrote:
> > > On Wed, Sep 8, 2021 at 2:32 AM Peter Xu  wrote:
> > > >
> > > > On Thu, Sep 02, 2021 at 04:22:55AM -0300, Leonardo Bras Soares Passos 
> > > > wrote:
> > > > > > I don't think it is valid to unconditionally enable this feature 
> > > > > > due to the
> > > > > > resource usage implications
> > > > > >
> > > > > > https://www.kernel.org/doc/html/v5.4/networking/msg_zerocopy.html
> > > > > >
> > > > > >   "A zerocopy failure will return -1 with errno ENOBUFS. This 
> > > > > > happens
> > > > > >if the socket option was not set, the socket exceeds its optmem
> > > > > >limit or the user exceeds its ulimit on locked pages."
> > > > >
> > > > > You are correct, I unfortunately missed this part in the docs :(
> > > > >
> > > > > > The limit on locked pages is something that looks very likely to be
> > > > > > exceeded unless you happen to be running a QEMU config that already
> > > > > > implies locked memory (eg PCI assignment)
> > > > >
> > > > > Do you mean the limit an user has on locking memory?
> > > > >
> > > > > If so, that makes sense. I remember I needed to set the upper limit 
> > > > > of locked
> > > > > memory for the user before using it, or adding a capability to qemu 
> > > > > before.
> > > >
> > > > So I'm a bit confused on why MSG_ZEROCOPY requires checking 
> > > > RLIMIT_MEMLOCK.
> > > >
> > > > The thing is IIUC that's accounting for pinned pages only with either 
> > > > mlock()
> > > > (FOLL_MLOCK) or vfio (FOLL_PIN).
> > > >
> > > > I don't really think MSG_ZEROCOPY is doing that at all...  I'm looking 
> > > > at
> > > > __zerocopy_sg_from_iter() -> iov_iter_get_pages().
> > >
> > > It happens probably here:
> > >
> > > E.g
> > >
> > > __ip_append_data()
> > > msg_zerocopy_realloc()
> > > mm_account_pinned_pages()
> >
> > When do they get uncounted?  i.e. is it just the data that's in flight
> > that's marked as pinned?
>
> I think so - there's __msg_zerocopy_callback() -> mm_unaccount_pinned_pages()
> correspondingly.  Thanks,

Yes, and actually the memory that could be pinned is limited by the
sndbuf of TCP socket. So we are fine with rlimit (e.g we don't need to
pin all guest pages).

Thanks

>
> --
> Peter Xu
>

Re: [PATCH 4/5] ebpf_rss_helper: Added helper for eBPF RSS.

2021-09-08 Thread Yuri Benditovich

On Wed, Sep 8, 2021 at 6:45 AM Jason Wang  wrote:
>
> On Tue, Sep 7, 2021 at 6:40 PM Yuri Benditovich
>  wrote:
> >
> > On Wed, Sep 1, 2021 at 9:42 AM Jason Wang  wrote:
> > >
> > >
> > > 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > > > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang  wrote:
> > > >>
> > > >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > > >>> Helper program. Loads eBPF RSS program and maps and passes them 
> > > >>> through unix socket.
> > > >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> > > >>
> > > >> I wonder if this can be done as helper for TAP/bridge.
> > > >>
> > > >> E.g it's the qemu to launch those helper with set-uid.
> > > >>
> > > >> Then libvirt won't even need to care about that?
> > > >>
> > > > There are pros and cons for such a solution with set-uid.
> > > >  From my point of view one of the cons is that set-uid is efficient
> > > > only at install time so the coexistence of different qemu builds (and
> > > > different helpers for each one) is kind of problematic.
> > > > With the current solution this does not present any problem: the
> > > > developer can have several different builds, each one automatically
> > > > has its own helper and there is no conflict between these builds and
> > > > between these builds and installed qemu package. Changing the
> > > > 'emulator' in the libvirt profile automatically brings the proper
> > > > helper to work.
> > >
> > >
> > > I'm not sure I get you here. We can still have default/sample helper to
> > > make sure it works for different builds.
> > >
> > > If we can avoid the involvement of libvirt, that would be better.
> >
> > Hi Jason,
> >
> > Indeed I did not get the idea, can you please explain it in more
> > details (as detailed as possible to avoid future misunderstanding),
> > especially how exactly we can use the set-uid and what is the 'default' 
> > helper.
> > We also would prefer to do everything from qemu but we do not see how
> > we can do that.
>
>
Some more questions to understand the idea better:
> Something like:
>
> 1) -netdev tap,rss_helper=/path/to/name

So, on each editing of 'emulator' in the xml  the helper path should
be set manually or be default?

> 2) having a sample/default helper implemented in Qemu

Does it mean the default helper is the code in the qemu (without
running additional executable, like it does today) or this is qemu
itself with dedicated command line?
As far as I remember Daniel had strong objections of ever running qemu
with capabilities

> 3) we can introduce something special path like "default", then if
> -netdev tap,rss_helper="default" is specified, qemu will use the
> sample helper

Probably this is not so important but the rss helper and rss in
general has no relation to netdev, much more they are related to
virtio-net

>
> So we have:
> 1) set set-uid for the helper
Who and when does set-uid to the helper binary? Only installer or
libvirt can do that, correct?

> 2) libvirt may just choose to launch the default helper
All this discussion is to avoid launching the helper from libvirt, correct?

>
> >
> > Our main points (what should be addressed):
> > - qemu should be able to load ebpf and use the maps when it runs from
> > libvirt (without special caps) and standalone (with caps)
>
> This is solved by leaving the privileged operations to the helper with 
> set-uid.
>
> > - it is possible that there are different qemu builds on the machine,
> > one of them might be installed, their ebpf's might be different and
> > the interface between qemu and ebpf (exact content of maps and number
> > of maps)
>
> We can use different helpers in this way.
>
> > - qemu configures the RSS dynamically according to the commands
> > provided by the guest
>
> Consider we decided to use mmap() based maps, this is not an issue.
>
> Or am I missing something?
>
> Thanks
>
> >
> > Thanks in advance
> > Yuri
> >
> > >
> > > Thanks
> > >
> > >
> > > >
> > > >>> Also, libbpf dependency now exclusively for Linux.
> > > >>> Libbpf is used for eBPF RSS steering, which is supported only by 
> > > >>> Linux TAP.
> > > >>> There is no reason yet to build eBPF loader and helper for non Linux 
> > > >>> systems,
> > > >>> even if libbpf is present.
> > > >>>
> > > >>> Signed-off-by: Andrew Melnychenko 
> > > >>> ---
> > > >>>ebpf/qemu-ebpf-rss-helper.c | 130 
> > > >>> 
> > > >>>meson.build |  37 ++
> > > >>>2 files changed, 154 insertions(+), 13 deletions(-)
> > > >>>create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> > > >>>
> > > >>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> > > >>> new file mode 100644
> > > >>> index 00..fe68758f57
> > > >>> --- /dev/null
> > > >>> +++ b/ebpf/qemu-ebpf-rss-helper.c
> > > >>> @@ -0,0 +1,130 @@
> > > >>> +/*
> > > >>> + * eBPF RSS Helper
> > > >>> + *
> > > >>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > > >>> + *
> >

Re: [PATCH 1/2] elf2dmp: Check curl_easy_setopt() return value

2021-09-08 Thread Philippe Mathieu-Daudé

On 9/8/21 11:43 PM, Viktor Prutyanov wrote:
> On Wed, 1 Sep 2021 17:25:09 +0200
> Philippe Mathieu-Daudé  wrote:
> 
>> On 9/1/21 4:39 PM, Peter Maydell wrote:
>>> Coverity points out that we aren't checking the return value
>>> from curl_easy_setopt().
>>>
>>> Fixes: Coverity CID 1458895
>>> Signed-off-by: Peter Maydell 
>>> ---
>>>  contrib/elf2dmp/download.c | 28 +---
>>>  1 file changed, 17 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/contrib/elf2dmp/download.c b/contrib/elf2dmp/download.c
>>> index d09e607431f..01e4a7fc0dc 100644
>>> --- a/contrib/elf2dmp/download.c
>>> +++ b/contrib/elf2dmp/download.c
>>> @@ -21,21 +21,19 @@ int download_url(const char *name, const char
>>> *url) 
>>>  file = fopen(name, "wb");
>>>  if (!file) {
>>> -err = 1;
>>> -goto out_curl;
>>> +goto fail;
>>>  }
>>>  
>>> -curl_easy_setopt(curl, CURLOPT_URL, url);
>>> -curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL);
>>> -curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
>>> -curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
>>> -curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0);
>>> +if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK ||
>>> +curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) !=
>>> CURLE_OK ||
>>> +curl_easy_setopt(curl, CURLOPT_WRITEDATA, file) !=
>>> CURLE_OK ||
>>> +curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) !=
>>> CURLE_OK ||
>>> +curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK)
>>> {
>>> +goto fail;
>>> +}
>>>  
>>>  if (curl_easy_perform(curl) != CURLE_OK) {
>>> -err = 1;
>>> -fclose(file);
>>> -unlink(name);
>>> -goto out_curl;
>>> +goto fail;
>>>  }
>>>  
>>>  err = fclose(file);
>>> @@ -44,4 +42,12 @@ out_curl:
>>>  curl_easy_cleanup(curl);
>>>  
>>>  return err;
>>> +
>>> +fail:
>>> +err = 1;
>>> +if (file) {
>>> +fclose(file);
>>> +unlink(name);
>>> +}
>>> +goto out_curl;
>>>  }
>>>   
>>
>> Counter proposal without goto and less ifs:
>>
>> -- >8 --  
>> @@ -25,21 +25,19 @@ int download_url(const char *name, const char
>> *url) goto out_curl;
>>  }
>>
>> -curl_easy_setopt(curl, CURLOPT_URL, url);
>> -curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL);
>> -curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
>> -curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
>> -curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0);
>> -
>> -if (curl_easy_perform(curl) != CURLE_OK) {
>> -err = 1;
>> -fclose(file);
>> +if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK
>> +|| curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) !=
>> CURLE_OK
>> +|| curl_easy_setopt(curl, CURLOPT_WRITEDATA, file) !=
>> CURLE_OK
>> +|| curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) !=
>> CURLE_OK
>> +|| curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0) !=
>> CURLE_OK
>> +|| curl_easy_perform(curl) != CURLE_OK) {
>>  unlink(name);
>> -goto out_curl;
>> +fclose(file);
>> +err = 1;
>> +} else {
>> +err = fclose(file);
>>  }
>>
>> -err = fclose(file);
>> -
>>  out_curl:
>>  curl_easy_cleanup(curl);
>>
>> ---
>>
> 
> Honestly, I would prefer this version over the original patch.
> In any way, I have tested both of them.

OK I will post this properly and let Peter pick whichever he
prefers. Do you mind to reply to the cover letter with a
"Tested-by: Viktor Prutyanov "
tag?

[RFC PATCH 10/10] hw/sd: Mark sdhci-pci device as unsafe

2021-09-08 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/sd/sdhci-pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/sd/sdhci-pci.c b/hw/sd/sdhci-pci.c
index c737c8b930e..7a36f88fd87 100644
--- a/hw/sd/sdhci-pci.c
+++ b/hw/sd/sdhci-pci.c
@@ -64,6 +64,7 @@ static void sdhci_pci_class_init(ObjectClass *klass, void 
*data)
 k->device_id = PCI_DEVICE_ID_REDHAT_SDHCI;
 k->class_id = PCI_CLASS_SYSTEM_SDHCI;
 device_class_set_props(dc, sdhci_pci_properties);
+dc->taints_security_policy = true;
 
 sdhci_common_class_init(klass, data);
 }
-- 
2.31.1

[RFC PATCH 06/10] qdev: Use qemu_security_policy_taint() API

2021-09-08 Thread Philippe Mathieu-Daudé

Add DeviceClass::taints_security_policy field to allow an
unsafe device to eventually taint the global security policy
in DeviceRealize().

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/qdev-core.h |  6 ++
 hw/core/qdev.c | 11 +++
 2 files changed, 17 insertions(+)

diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index bafc311bfa1..ff9ce6671be 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -122,6 +122,12 @@ struct DeviceClass {
  */
 bool user_creatable;
 bool hotpluggable;
+/*
+ * %false if the device is within the QEMU security policy boundary,
+ * %true if there is no guarantee this device can be used safely.
+ * See: https://www.qemu.org/contribute/security-process/
+ */
+bool taints_security_policy;
 
 /* callbacks */
 /*
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index cefc5eaa0a9..a5a00f3564c 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -31,6 +31,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
+#include "qemu-common.h"
 #include "qemu/option.h"
 #include "hw/hotplug.h"
 #include "hw/irq.h"
@@ -257,6 +258,13 @@ bool qdev_hotplug_allowed(DeviceState *dev, Error **errp)
 MachineClass *mc;
 Object *m_obj = qdev_get_machine();
 
+if (qemu_security_policy_is_strict()
+&& DEVICE_GET_CLASS(dev)->taints_security_policy) {
+error_setg(errp, "Device '%s' can not be hotplugged when"
+ " 'strict' security policy is in place",
+   object_get_typename(OBJECT(dev)));
+}
+
 if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
 machine = MACHINE(m_obj);
 mc = MACHINE_GET_CLASS(machine);
@@ -385,6 +393,9 @@ bool qdev_realize(DeviceState *dev, BusState *bus, Error 
**errp)
 } else {
 assert(!DEVICE_GET_CLASS(dev)->bus_type);
 }
+qemu_security_policy_taint(DEVICE_GET_CLASS(dev)->taints_security_policy,
+   "device type %s",
+   object_get_typename(OBJECT(dev)));
 
 return object_property_set_bool(OBJECT(dev), "realized", true, errp);
 }
-- 
2.31.1

[RFC PATCH 07/10] hw/display: Mark ATI and Artist devices as unsafe

2021-09-08 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/display/artist.c | 1 +
 hw/display/ati.c| 1 +
 2 files changed, 2 insertions(+)

diff --git a/hw/display/artist.c b/hw/display/artist.c
index 21b7fd1b440..067a4b2cb59 100644
--- a/hw/display/artist.c
+++ b/hw/display/artist.c
@@ -1482,6 +1482,7 @@ static void artist_class_init(ObjectClass *klass, void 
*data)
 dc->vmsd = _artist;
 dc->reset = artist_reset;
 device_class_set_props(dc, artist_properties);
+dc->taints_security_policy = true;
 }
 
 static const TypeInfo artist_info = {
diff --git a/hw/display/ati.c b/hw/display/ati.c
index 31f22754dce..2f27ab69a87 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -1024,6 +1024,7 @@ static void ati_vga_class_init(ObjectClass *klass, void 
*data)
 device_class_set_props(dc, ati_vga_properties);
 dc->hotpluggable = false;
 set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
+dc->taints_security_policy = true;
 
 k->class_id = PCI_CLASS_DISPLAY_VGA;
 k->vendor_id = PCI_VENDOR_ID_ATI;
-- 
2.31.1

[RFC PATCH 09/10] hw/net: Mark Tulip device as unsafe

2021-09-08 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/net/tulip.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/net/tulip.c b/hw/net/tulip.c
index ca69f7ea5e1..eaad3266212 100644
--- a/hw/net/tulip.c
+++ b/hw/net/tulip.c
@@ -1025,6 +1025,7 @@ static void tulip_class_init(ObjectClass *klass, void 
*data)
 device_class_set_props(dc, tulip_properties);
 dc->reset = tulip_qdev_reset;
 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+dc->taints_security_policy = true;
 }
 
 static const TypeInfo tulip_info = {
-- 
2.31.1

[RFC PATCH 04/10] block/vvfat: Mark the driver as unsafe

2021-09-08 Thread Philippe Mathieu-Daudé

While being listed as 'supported' in MAINTAINERS, this driver
does not have many reviewers and contains various /* TODO */
unattended since various years. Not safe enough for production
environment, so have it taint the global security policy.

Signed-off-by: Philippe Mathieu-Daudé 
---
 block/vvfat.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/block/vvfat.c b/block/vvfat.c
index 34bf1e3a86e..993e40727d6 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3199,6 +3199,11 @@ static void vvfat_close(BlockDriverState *bs)
 }
 }
 
+static bool vvfat_taints_security_policy(BlockDriverState *bs)
+{
+return true;
+}
+
 static const char *const vvfat_strong_runtime_opts[] = {
 "dir",
 "fat-type",
@@ -3219,6 +3224,7 @@ static BlockDriver bdrv_vvfat = {
 .bdrv_refresh_limits= vvfat_refresh_limits,
 .bdrv_close = vvfat_close,
 .bdrv_child_perm= vvfat_child_perm,
+.bdrv_taints_security_policy = vvfat_taints_security_policy,
 
 .bdrv_co_preadv = vvfat_co_preadv,
 .bdrv_co_pwritev= vvfat_co_pwritev,
-- 
2.31.1

[RFC PATCH 03/10] block: Use qemu_security_policy_taint() API

2021-09-08 Thread Philippe Mathieu-Daudé

Add the BlockDriver::bdrv_taints_security_policy() handler.
Drivers implementing it might taint the global QEMU security
policy.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/block/block_int.h | 6 +-
 block.c   | 6 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index f1a54db0f8c..0ec0a5c06e9 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -169,7 +169,11 @@ struct BlockDriver {
 int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
   Error **errp);
 void (*bdrv_close)(BlockDriverState *bs);
-
+/*
+ * Return %true if the driver is withing QEMU security policy boundary,
+ * %false otherwise. See: https://www.qemu.org/contribute/security-process/
+ */
+bool (*bdrv_taints_security_policy)(BlockDriverState *bs);
 
 int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
Error **errp);
diff --git a/block.c b/block.c
index b2b66263f9a..696ba486001 100644
--- a/block.c
+++ b/block.c
@@ -49,6 +49,7 @@
 #include "qemu/timer.h"
 #include "qemu/cutils.h"
 #include "qemu/id.h"
+#include "qemu-common.h"
 #include "block/coroutines.h"
 
 #ifdef CONFIG_BSD
@@ -1587,6 +1588,11 @@ static int bdrv_open_driver(BlockDriverState *bs, 
BlockDriver *drv,
 }
 }
 
+if (drv->bdrv_taints_security_policy) {
+qemu_security_policy_taint(drv->bdrv_taints_security_policy(bs),
+   "Block protocol '%s'", drv->format_name);
+}
+
 return 0;
 open_failed:
 bs->drv = NULL;
-- 
2.31.1

[RFC PATCH 05/10] block/null: Mark 'read-zeroes=off' option as unsafe

2021-09-08 Thread Philippe Mathieu-Daudé

See commit b317006a3f1 ("docs/secure-coding-practices: Describe how
to use 'null-co' block driver") for rationale.

Signed-off-by: Philippe Mathieu-Daudé 
---
 block/null.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/block/null.c b/block/null.c
index cc9b1d4ea72..11e428f3cc2 100644
--- a/block/null.c
+++ b/block/null.c
@@ -99,6 +99,13 @@ static int null_file_open(BlockDriverState *bs, QDict 
*options, int flags,
 return ret;
 }
 
+static bool null_taints_security_policy(BlockDriverState *bs)
+{
+BDRVNullState *s = bs->opaque;
+
+return !s->read_zeroes;
+}
+
 static int64_t null_getlength(BlockDriverState *bs)
 {
 BDRVNullState *s = bs->opaque;
@@ -283,6 +290,7 @@ static BlockDriver bdrv_null_co = {
 .bdrv_parse_filename= null_co_parse_filename,
 .bdrv_getlength = null_getlength,
 .bdrv_get_allocated_file_size = null_allocated_file_size,
+.bdrv_taints_security_policy = null_taints_security_policy,
 
 .bdrv_co_preadv = null_co_preadv,
 .bdrv_co_pwritev= null_co_pwritev,
-- 
2.31.1

[RFC PATCH 08/10] hw/misc: Mark testdev devices as unsafe

2021-09-08 Thread Philippe Mathieu-Daudé

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/hyperv/hyperv_testdev.c | 1 +
 hw/misc/pc-testdev.c   | 1 +
 hw/misc/pci-testdev.c  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/hw/hyperv/hyperv_testdev.c b/hw/hyperv/hyperv_testdev.c
index 9a56ddf83fe..6a75c350389 100644
--- a/hw/hyperv/hyperv_testdev.c
+++ b/hw/hyperv/hyperv_testdev.c
@@ -310,6 +310,7 @@ static void hv_test_dev_class_init(ObjectClass *klass, void 
*data)
 
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 dc->realize = hv_test_dev_realizefn;
+dc->taints_security_policy = true;
 }
 
 static const TypeInfo hv_test_dev_info = {
diff --git a/hw/misc/pc-testdev.c b/hw/misc/pc-testdev.c
index e3896518694..6294b80ec1b 100644
--- a/hw/misc/pc-testdev.c
+++ b/hw/misc/pc-testdev.c
@@ -199,6 +199,7 @@ static void testdev_class_init(ObjectClass *klass, void 
*data)
 
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 dc->realize = testdev_realizefn;
+dc->taints_security_policy = true;
 }
 
 static const TypeInfo testdev_info = {
diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c
index 03845c8de34..189eb9bf1bb 100644
--- a/hw/misc/pci-testdev.c
+++ b/hw/misc/pci-testdev.c
@@ -340,6 +340,7 @@ static void pci_testdev_class_init(ObjectClass *klass, void 
*data)
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 dc->reset = qdev_pci_testdev_reset;
 device_class_set_props(dc, pci_testdev_properties);
+dc->taints_security_policy = true;
 }
 
 static const TypeInfo pci_testdev_info = {
-- 
2.31.1

[RFC PATCH 02/10] accel: Use qemu_security_policy_taint(), mark KVM and Xen as safe

2021-09-08 Thread Philippe Mathieu-Daudé

Add the AccelClass::secure_policy_supported field to classify
safe (within security boundary) vs unsafe accelerators.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/qemu/accel.h | 5 +
 accel/kvm/kvm-all.c  | 1 +
 accel/xen/xen-all.c  | 1 +
 softmmu/vl.c | 3 +++
 4 files changed, 10 insertions(+)

diff --git a/include/qemu/accel.h b/include/qemu/accel.h
index 4f4c283f6fc..895e30be0de 100644
--- a/include/qemu/accel.h
+++ b/include/qemu/accel.h
@@ -44,6 +44,11 @@ typedef struct AccelClass {
hwaddr start_addr, hwaddr size);
 #endif
 bool *allowed;
+/*
+ * Whether the accelerator is withing QEMU security policy boundary.
+ * See: https://www.qemu.org/contribute/security-process/
+ */
+bool secure_policy_supported;
 /*
  * Array of global properties that would be applied when specific
  * accelerator is chosen. It works like MachineClass.compat_props
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 0125c17edb8..eb6b9e44df2 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3623,6 +3623,7 @@ static void kvm_accel_class_init(ObjectClass *oc, void 
*data)
 ac->init_machine = kvm_init;
 ac->has_memory = kvm_accel_has_memory;
 ac->allowed = _allowed;
+ac->secure_policy_supported = true;
 
 object_class_property_add(oc, "kernel-irqchip", "on|off|split",
 NULL, kvm_set_kernel_irqchip,
diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index 69aa7d018b2..57867af5faf 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -198,6 +198,7 @@ static void xen_accel_class_init(ObjectClass *oc, void 
*data)
 ac->setup_post = xen_setup_post;
 ac->allowed = _allowed;
 ac->compat_props = g_ptr_array_new();
+ac->secure_policy_supported = true;
 
 compat_props_add(ac->compat_props, compat, G_N_ELEMENTS(compat));
 
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 92c05ac97ee..e4f94e159c3 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2388,6 +2388,9 @@ static int do_configure_accelerator(void *opaque, 
QemuOpts *opts, Error **errp)
 return 0;
 }
 
+qemu_security_policy_taint(!ac->secure_policy_supported,
+   "%s accelerator", acc);
+
 return 1;
 }
 
-- 
2.31.1

[RFC PATCH 00/10] security: Introduce qemu_security_policy_taint() API

2021-09-08 Thread Philippe Mathieu-Daudé

Hi,

This series is experimental! The goal is to better limit the
boundary of what code is considerated security critical, and
what is less critical (but still important!).

This approach was quickly discussed few months ago with Markus
then Daniel. Instead of classifying the code on a file path
basis (see [1]), we insert (runtime) hints into the code
(which survive code movement). Offending unsafe code can
taint the global security policy. By default this policy
is 'none': the current behavior. It can be changed on the
command line to 'warn' to display warnings, and to 'strict'
to prohibit QEMU running with a tainted policy.

As examples I started implementing unsafe code taint from
3 different pieces of code:
- accelerators (KVM and Xen in allow-list)
- block drivers (vvfat and parcial null-co in deny-list)
- qdev (hobbyist devices regularly hit by fuzzer)

I don't want the security researchers to not fuzz QEMU unsafe
areas, but I'd like to make it clearer what the community
priority is (currently 47 opened issues on [3]).

Regards,

Phil.

[1] 
https://lore.kernel.org/qemu-devel/20200714083631.888605-2-ppan...@redhat.com/
[2] https://www.qemu.org/contribute/security-process/
[3] https://gitlab.com/qemu-project/qemu/-/issues?label_name[]=Fuzzer

Philippe Mathieu-Daudé (10):
  sysemu: Introduce qemu_security_policy_taint() API
  accel: Use qemu_security_policy_taint(), mark KVM and Xen as safe
  block: Use qemu_security_policy_taint() API
  block/vvfat: Mark the driver as unsafe
  block/null: Mark 'read-zeroes=off' option as unsafe
  qdev: Use qemu_security_policy_taint() API
  hw/display: Mark ATI and Artist devices as unsafe
  hw/misc: Mark testdev devices as unsafe
  hw/net: Mark Tulip device as unsafe
  hw/sd: Mark sdhci-pci device as unsafe

 qapi/run-state.json| 16 +
 include/block/block_int.h  |  6 +++-
 include/hw/qdev-core.h |  6 
 include/qemu-common.h  | 19 +++
 include/qemu/accel.h   |  5 +++
 accel/kvm/kvm-all.c|  1 +
 accel/xen/xen-all.c|  1 +
 block.c|  6 
 block/null.c   |  8 +
 block/vvfat.c  |  6 
 hw/core/qdev.c | 11 ++
 hw/display/artist.c|  1 +
 hw/display/ati.c   |  1 +
 hw/hyperv/hyperv_testdev.c |  1 +
 hw/misc/pc-testdev.c   |  1 +
 hw/misc/pci-testdev.c  |  1 +
 hw/net/tulip.c |  1 +
 hw/sd/sdhci-pci.c  |  1 +
 softmmu/vl.c   | 70 ++
 qemu-options.hx| 17 +
 20 files changed, 178 insertions(+), 1 deletion(-)

-- 
2.31.1

[RFC PATCH 01/10] sysemu: Introduce qemu_security_policy_taint() API

2021-09-08 Thread Philippe Mathieu-Daudé

Introduce qemu_security_policy_taint() which allows unsafe (read
"not very maintained") code to 'taint' QEMU security policy.

The "security policy" is the @SecurityPolicy QAPI enum, composed of:
- "none"   (no policy, current behavior)
- "warn"   (display a warning when the policy is tainted, keep going)
- "strict" (once tainted, exit QEMU before starting the VM)

The qemu_security_policy_is_strict() helper is also provided, which
will be proved useful once a VM is started (example we do not want
to kill a running VM if an unsafe device is hot-added).

Signed-off-by: Philippe Mathieu-Daudé 
---
 qapi/run-state.json   | 16 +++
 include/qemu-common.h | 19 
 softmmu/vl.c  | 67 +++
 qemu-options.hx   | 17 +++
 4 files changed, 119 insertions(+)

diff --git a/qapi/run-state.json b/qapi/run-state.json
index 43d66d700fc..b15a107fa01 100644
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -638,3 +638,19 @@
 { 'struct': 'MemoryFailureFlags',
   'data': { 'action-required': 'bool',
 'recursive': 'bool'} }
+
+##
+# @SecurityPolicy:
+#
+# An enumeration of the actions taken when the security policy is tainted.
+#
+# @none: do nothing.
+#
+# @warn: display a warning.
+#
+# @strict: prohibit QEMU to start a VM.
+#
+# Since: 6.2
+##
+{ 'enum': 'SecurityPolicy',
+  'data': [ 'none', 'warn', 'strict' ] }
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 73bcf763ed8..bf0b054bb66 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -139,4 +139,23 @@ void page_size_init(void);
  * returned. */
 bool dump_in_progress(void);
 
+/**
+ * qemu_security_policy_taint:
+ * @tainting whether any security policy is tainted (compromised).
+ * @fmt: taint reason format string
+ * ...: list of arguments to interpolate into @fmt, like printf().
+ *
+ * Allow unsafe code path to taint the global security policy.
+ * See #SecurityPolicy.
+ */
+void qemu_security_policy_taint(bool tainting, const char *fmt, ...)
+GCC_FMT_ATTR(2, 3);
+
+/**
+ * qemu_security_policy_is_strict:
+ *
+ * Return %true if the global security policy is 'strict', %false otherwise.
+ */
+bool qemu_security_policy_is_strict(void);
+
 #endif
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 55ab70eb97f..92c05ac97ee 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -489,6 +489,20 @@ static QemuOptsList qemu_action_opts = {
 },
 };
 
+static QemuOptsList qemu_security_policy_opts = {
+.name = "security-policy",
+.implied_opt_name = "policy",
+.merge_lists = true,
+.head = QTAILQ_HEAD_INITIALIZER(qemu_security_policy_opts.head),
+.desc = {
+{
+.name = "policy",
+.type = QEMU_OPT_STRING,
+},
+{ /* end of list */ }
+},
+};
+
 const char *qemu_get_vm_name(void)
 {
 return qemu_name;
@@ -600,6 +614,52 @@ static int cleanup_add_fd(void *opaque, QemuOpts *opts, 
Error **errp)
 }
 #endif
 
+static SecurityPolicy security_policy = SECURITY_POLICY_NONE;
+
+bool qemu_security_policy_is_strict(void)
+{
+return security_policy == SECURITY_POLICY_STRICT;
+}
+
+static int select_security_policy(const char *p)
+{
+int policy;
+char *qapi_value;
+
+qapi_value = g_ascii_strdown(p, -1);
+policy = qapi_enum_parse(_lookup, qapi_value, -1, NULL);
+g_free(qapi_value);
+if (policy < 0) {
+return -1;
+}
+security_policy = policy;
+
+return 0;
+}
+
+void qemu_security_policy_taint(bool tainting, const char *fmt, ...)
+{
+va_list ap;
+g_autofree char *efmt = NULL;
+
+if (security_policy == SECURITY_POLICY_NONE || !tainting) {
+return;
+}
+
+va_start(ap, fmt);
+if (security_policy == SECURITY_POLICY_STRICT) {
+efmt = g_strdup_printf("%s taints QEMU security policy, exiting.", 
fmt);
+error_vreport(efmt, ap);
+exit(EXIT_FAILURE);
+} else if (security_policy == SECURITY_POLICY_WARN) {
+efmt = g_strdup_printf("%s taints QEMU security policy.", fmt);
+warn_vreport(efmt, ap);
+} else {
+g_assert_not_reached();
+}
+va_end(ap);
+}
+
 /***/
 /* QEMU Block devices */
 
@@ -2764,6 +2824,7 @@ void qemu_init(int argc, char **argv, char **envp)
 qemu_add_opts(_semihosting_config_opts);
 qemu_add_opts(_fw_cfg_opts);
 qemu_add_opts(_action_opts);
+qemu_add_opts(_security_policy_opts);
 module_call_init(MODULE_INIT_OPTS);
 
 error_init(argv[0]);
@@ -3230,6 +3291,12 @@ void qemu_init(int argc, char **argv, char **envp)
 exit(1);
 }
 break;
+case QEMU_OPTION_security_policy:
+if (select_security_policy(optarg) == -1) {
+error_report("unknown -security-policy parameter");
+exit(1);
+}
+break;
 case QEMU_OPTION_parallel:

Re: [PATCH 0/3] qapi: static typing conversion, pt5c

2021-09-08 Thread John Snow

On Wed, Sep 8, 2021, 9:42 AM Markus Armbruster  wrote:

> Needs a rebase now.  Let's finish discussing my review of pt5b [v2]
> first.  Pending patches to expr.py should have made it to master by
> then.  If you're impatient, suggest to base on master + "[PATCH 0/5]
> qapi: Another round of minor fixes and cleanups" + pt5b [v2].
>

Not that impatient. Will focus on seeing 5b through the gates first.

My plan is to get the AQMP stuff merged and CI settled, then return focus
to you. Shouldn't take long.

--js

(ps: trying to answer mails from my phone for $reasons. If my formatting is
garbled and stupid, please just yell at me and I'll figure out how to fix
it. Thanks!)

>

[PATCH] hw/i386/acpi-build: Fix a typo

2021-09-08 Thread Philippe Mathieu-Daudé

Fix 'hotplugabble' -> 'hotpluggabble' typo.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/i386/acpi-build.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index d1f5fa3b5a5..478263e12c9 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1916,7 +1916,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
MachineState *machine)
 PCMachineState *pcms = PC_MACHINE(machine);
 int nb_numa_nodes = machine->numa_state->num_nodes;
 NodeInfo *numa_info = machine->numa_state->nodes;
-ram_addr_t hotplugabble_address_space_size =
+ram_addr_t hotpluggabble_address_space_size =
 object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE,
 NULL);
 
@@ -2022,10 +2022,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
MachineState *machine)
  * Memory devices may override proximity set by this entry,
  * providing _PXM method if necessary.
  */
-if (hotplugabble_address_space_size) {
+if (hotpluggabble_address_space_size) {
 numamem = acpi_data_push(table_data, sizeof *numamem);
 build_srat_memory(numamem, machine->device_memory->base,
-  hotplugabble_address_space_size, nb_numa_nodes - 1,
+  hotpluggabble_address_space_size, nb_numa_nodes - 1,
   MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
 }
 
-- 
2.31.1

Re: Compatibility between -device sga and -machine graphics=off

2021-09-08 Thread Daniel P . Berrangé

On Wed, Sep 08, 2021 at 10:50:53PM +0200, Gerd Hoffmann wrote:
> On Wed, Sep 08, 2021 at 05:08:08PM +0100, Daniel P. Berrangé wrote:
> > Given the libvirt XML snippet
> > 
> >   
> > ...
> > 
> > ...
> >   
> > 
> > libvirt QEMU driver will always format
> > 
> >   -device sga
> > 
> > Libguestfs uses this syntax, so we need to make sure it still works
> > in future even if 'sga' is disabled or removed in a QEMU build in
> > favour of SeaBIOS' built-in support.
> 
> Just replacing '-device sga' with '-machine graphics=off' in case sga is
> not available should work fine.
> 
> serial console support in seabios is available for quite a while
> (merged in 2017, seabios 1.11 in rhel-7 has it), so switching over
> unconditionally is possibly an option too.  Not sure what the libvirt
> backward compatibility policy is though.

libvirt currently targets QEMU 2.11.0 and newer.

Fortunately it appears that QEMU 2.11.0 included SeaBIOS 1.11.0,
so we can thus eliminate usage of 'sga' from libvirt entirely,
provided I can convince myself migration is safe, which looks
probable.

> > On non-x86 emulators I see graphics=off has semantic effects beyond
> > just controlling whether the firmware prints to the serial or not
> > though.
> 
> It's been a while, but yes, IIRC on ppc this is passed to the linux
> kernel somehow (device tree?) so it also affects the default console
> device used by linux.
> 
> But sgabios is x86-only anyway so that should not be a problem here.

libvirt has never validated the arch when enabling sga, but clearly
it can't ever have been usable on non-x86. So I'll probably restrict
the usage of graphics=off to only those arches where I can validate
it something useful for the firmware output. 


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH 3/5] target/i386: convert to use format_state instead of dump_state

2021-09-08 Thread Daniel P . Berrangé

On Wed, Sep 08, 2021 at 01:05:13PM -0500, Eric Blake wrote:
> On Wed, Sep 08, 2021 at 11:37:09AM +0100, Daniel P. Berrangé wrote:
> > Signed-off-by: Daniel P. Berrangé 
> > ---
> >  target/i386/cpu-dump.c | 325 ++---
> >  target/i386/cpu.c  |   2 +-
> >  target/i386/cpu.h  |   2 +-
> >  3 files changed, 174 insertions(+), 155 deletions(-)
> > 
> > diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c
> > index 02b635a52c..8e19485a20 100644
> > --- a/target/i386/cpu-dump.c
> > +++ b/target/i386/cpu-dump.c
> > @@ -94,41 +94,45 @@ static const char *cc_op_str[CC_OP_NB] = {
> >  };
> >  
> >  static void
> > -cpu_x86_dump_seg_cache(CPUX86State *env, FILE *f,
> > +cpu_x86_dump_seg_cache(CPUX86State *env, GString *buf,
> > const char *name, struct SegmentCache *sc)
> >  {
> >  #ifdef TARGET_X86_64
> >  if (env->hflags & HF_CS64_MASK) {
> > -qemu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name,
> > - sc->selector, sc->base, sc->limit,
> > - sc->flags & 0x0000);
> > +g_string_append_printf(buf, "%-3s=%04x %016" PRIx64 " %08x %08x", 
> > name,
> > +   sc->selector, sc->base, sc->limit,
> > +   sc->flags & 0x0000);
> 
> Did you consider using open_memstream() to get a FILE* that can then
> be passed into these callbacks unchanged, rather than rewriting all
> the callbacks to a new signature?

That is certainly an option, but it wouldn't eliminate the need to do
a rewrite. I would still want to replace qemu_fprintf with fprintf in
that scenario. It is desirable to be able to eliminate the QEMU
specific printf wrappers which only exist because they need to treat
a NULL FILE* object as an indication to output to the HMP chardev.
Admittedly that would result in shorter lines than today.

> Then again, I like the GString signature better than FILE*, even if it
> makes for longer lines.

Yes, the verbosity is not ideal. I like the GString API as a general
purpose API for formatting text output to a buffer overall.

I don't feel too strongly either way though, as long as we get to a place
where we eliminate the custom QEMU printf wrappers that integrate with
the monitor APIs.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v1 2/3] io: Add zerocopy and errqueue

2021-09-08 Thread Daniel P . Berrangé

On Wed, Sep 08, 2021 at 05:09:33PM -0400, Peter Xu wrote:
> On Wed, Sep 08, 2021 at 05:25:50PM -0300, Leonardo Bras Soares Passos wrote:
> > On Tue, Sep 7, 2021 at 8:06 AM Dr. David Alan Gilbert
> >  wrote:
> > > > Possibly, yes. This really need David G's input since he understands
> > > > the code in way more detail than me.
> > >
> > > Hmm I'm not entirely sure why we have the sync after each iteration;
> > > the case I can think of is if we're doing async sending, we could have
> > > two versions of the same page in flight (one from each iteration) -
> > > you'd want those to get there in the right order.
> > >
> > > Dave
> > 
> > Well, that's the thing: as we don't copy the buffer in MSG_ZEROCOPY,  we 
> > will in
> > fact have the same page in flight twice, instead of two versions,
> > given the buffer is
> > sent as it is during transmission.
> 
> That's an interesting point, which looks even valid... :)
> 
> There can still be two versions depending on when the page is read and feed to
> the NICs as the page can be changing during the window, but as long as the
> latter sent page always lands later than the former page on dest node then it
> looks ok.

The really strange scenario is around TCP re-transmits. The kernel may
transmit page version 1, then we have version 2 written. The kerenl now
needs to retransmit a packet due to network loss. The re-transmission will
contain version 2 payload which differs from the originally lost packet's
payload.

IOW, the supposed "reliable" TCP stream is no longer reliable and actually
changes its data retrospectively because we've intentionally violated the
rules the kernel documented for use of MSG_ZEROCOPY.

We think we're probably ok with migration as we are going to rely on the
face that we eventually pause the guest to stop page changes during the
final switchover. None the less I really strongly dislike the idea of
not honouring the kernel API contract, despite the potential performance
benefits it brings.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH 1/2] elf2dmp: Check curl_easy_setopt() return value

2021-09-08 Thread Viktor Prutyanov

Hi,

On Wed, 1 Sep 2021 17:25:09 +0200
Philippe Mathieu-Daudé  wrote:

> On 9/1/21 4:39 PM, Peter Maydell wrote:
> > Coverity points out that we aren't checking the return value
> > from curl_easy_setopt().
> > 
> > Fixes: Coverity CID 1458895
> > Signed-off-by: Peter Maydell 
> > ---
> >  contrib/elf2dmp/download.c | 28 +---
> >  1 file changed, 17 insertions(+), 11 deletions(-)
> > 
> > diff --git a/contrib/elf2dmp/download.c b/contrib/elf2dmp/download.c
> > index d09e607431f..01e4a7fc0dc 100644
> > --- a/contrib/elf2dmp/download.c
> > +++ b/contrib/elf2dmp/download.c
> > @@ -21,21 +21,19 @@ int download_url(const char *name, const char
> > *url) 
> >  file = fopen(name, "wb");
> >  if (!file) {
> > -err = 1;
> > -goto out_curl;
> > +goto fail;
> >  }
> >  
> > -curl_easy_setopt(curl, CURLOPT_URL, url);
> > -curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL);
> > -curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
> > -curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
> > -curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0);
> > +if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK ||
> > +curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) !=
> > CURLE_OK ||
> > +curl_easy_setopt(curl, CURLOPT_WRITEDATA, file) !=
> > CURLE_OK ||
> > +curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) !=
> > CURLE_OK ||
> > +curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK)
> > {
> > +goto fail;
> > +}
> >  
> >  if (curl_easy_perform(curl) != CURLE_OK) {
> > -err = 1;
> > -fclose(file);
> > -unlink(name);
> > -goto out_curl;
> > +goto fail;
> >  }
> >  
> >  err = fclose(file);
> > @@ -44,4 +42,12 @@ out_curl:
> >  curl_easy_cleanup(curl);
> >  
> >  return err;
> > +
> > +fail:
> > +err = 1;
> > +if (file) {
> > +fclose(file);
> > +unlink(name);
> > +}
> > +goto out_curl;
> >  }
> >   
> 
> Counter proposal without goto and less ifs:
> 
> -- >8 --  
> @@ -25,21 +25,19 @@ int download_url(const char *name, const char
> *url) goto out_curl;
>  }
> 
> -curl_easy_setopt(curl, CURLOPT_URL, url);
> -curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL);
> -curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
> -curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
> -curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0);
> -
> -if (curl_easy_perform(curl) != CURLE_OK) {
> -err = 1;
> -fclose(file);
> +if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK
> +|| curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) !=
> CURLE_OK
> +|| curl_easy_setopt(curl, CURLOPT_WRITEDATA, file) !=
> CURLE_OK
> +|| curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) !=
> CURLE_OK
> +|| curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0) !=
> CURLE_OK
> +|| curl_easy_perform(curl) != CURLE_OK) {
>  unlink(name);
> -goto out_curl;
> +fclose(file);
> +err = 1;
> +} else {
> +err = fclose(file);
>  }
> 
> -err = fclose(file);
> -
>  out_curl:
>  curl_easy_cleanup(curl);
> 
> ---
> 

Honestly, I would prefer this version over the original patch.
In any way, I have tested both of them.

-- 
Viktor Prutyanov

Re: [PATCH 2/2] elf2dmp: Fail cleanly if PDB file specifies zero block_size

2021-09-08 Thread Viktor Prutyanov

Hi,

On Wed,  1 Sep 2021 15:39:10 +0100
Peter Maydell  wrote:

> Coverity points out that if the PDB file we're trying to read
> has a header specifying a block_size of zero then we will
> end up trying to divide by zero in pdb_ds_read_file().
> Check for this and fail cleanly instead.
> 
> Fixes: Coverity CID 1458869
> Signed-off-by: Peter Maydell 
> ---
>  contrib/elf2dmp/pdb.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c
> index b3a65470680..adcfa7e154c 100644
> --- a/contrib/elf2dmp/pdb.c
> +++ b/contrib/elf2dmp/pdb.c
> @@ -215,6 +215,10 @@ out_symbols:
>  
>  static int pdb_reader_ds_init(struct pdb_reader *r, PDB_DS_HEADER
> *hdr) {
> +if (hdr->block_size == 0) {
> +return 1;
> +}
> +
>  memset(r->file_used, 0, sizeof(r->file_used));
>  r->ds.header = hdr;
>  r->ds.toc = pdb_ds_read(hdr, (uint32_t *)((uint8_t *)hdr +

Looks good.

Reviewed-by: Viktor Prutyanov 

-- 
Viktor Prutyanov

Re: [PATCH v1 2/3] io: Add zerocopy and errqueue

2021-09-08 Thread Peter Xu

On Wed, Sep 08, 2021 at 05:25:50PM -0300, Leonardo Bras Soares Passos wrote:
> On Tue, Sep 7, 2021 at 8:06 AM Dr. David Alan Gilbert
>  wrote:
> > > Possibly, yes. This really need David G's input since he understands
> > > the code in way more detail than me.
> >
> > Hmm I'm not entirely sure why we have the sync after each iteration;
> > the case I can think of is if we're doing async sending, we could have
> > two versions of the same page in flight (one from each iteration) -
> > you'd want those to get there in the right order.
> >
> > Dave
> 
> Well, that's the thing: as we don't copy the buffer in MSG_ZEROCOPY,  we will 
> in
> fact have the same page in flight twice, instead of two versions,
> given the buffer is
> sent as it is during transmission.

That's an interesting point, which looks even valid... :)

There can still be two versions depending on when the page is read and feed to
the NICs as the page can be changing during the window, but as long as the
latter sent page always lands later than the former page on dest node then it
looks ok.

-- 
Peter Xu

Re: [PATCH v1 2/3] io: Add zerocopy and errqueue

2021-09-08 Thread Peter Xu

On Wed, Sep 08, 2021 at 05:13:40PM -0300, Leonardo Bras Soares Passos wrote:
> On Tue, Sep 7, 2021 at 1:44 PM Peter Xu  wrote:
> >
> > On Thu, Sep 02, 2021 at 03:59:25AM -0300, Leonardo Bras Soares Passos wrote:
> > > I also suggested something like that, but I thought it could be good if 
> > > we could
> > > fall back to io_writev() if we didn't have the zerocopy feature (or
> > > the async feature).
> > >
> > > What do you think?
> >
> > That fallback looks safe and ok, I'm just not sure whether it'll be of great
> > help.  E.g. if we provide an QIO api that allows both sync write and 
> > zero-copy
> > write (then we do the fallback when necessary), it means the buffer 
> > implication
> > applies too to this api, so it's easier to me to just detect the zero copy
> > capability and use one alternative.  Thanks,
> >
> > --
> > Peter Xu
> >
> 
> I was thinking this way (async method with fallback) we would allow code using
> the QIO api to just try to use the io_async_writev() whenever the code
> seems fit, and
> let the QIO channel layer to decide when it can be used
> (implementation provides,
> features available), and just fallback to io_writev() when it can't be used.

Sure, I think it depends on whether the whole sync/async interface will be the
same, e.g., when async api needs some callback registered then the interface
won't be the same, and the caller will be aware of that anyways.  Otherwise it
looks fine indeed.  Thanks,

-- 
Peter Xu

Re: Compatibility between -device sga and -machine graphics=off

2021-09-08 Thread Gerd Hoffmann

On Wed, Sep 08, 2021 at 05:08:08PM +0100, Daniel P. Berrangé wrote:
> Given the libvirt XML snippet
> 
>   
> ...
> 
> ...
>   
> 
> libvirt QEMU driver will always format
> 
>   -device sga
> 
> Libguestfs uses this syntax, so we need to make sure it still works
> in future even if 'sga' is disabled or removed in a QEMU build in
> favour of SeaBIOS' built-in support.

Just replacing '-device sga' with '-machine graphics=off' in case sga is
not available should work fine.

serial console support in seabios is available for quite a while
(merged in 2017, seabios 1.11 in rhel-7 has it), so switching over
unconditionally is possibly an option too.  Not sure what the libvirt
backward compatibility policy is though.

>  1. Graphical display only, no serial port, BIOS to graphical display
>  2. Serial port only, no graphics, BIOS to nowhere
>  3. Serial port only, no graphics, BIOS to serial
>  4. Graphical display, serial port, BIOS only to graphical display
>  5. Graphical display, serial port, BIOS to graphical display + serial

Should all work fine.

> If I do 'info mtree' though, I do see a different memory layout
> when changing from SGA to graphics=off

> -000cb000-000cdfff (prio 1000, ram): alias kvmvapic-rom 
> @pc.ram 000cb000-000cdfff
> +000ca000-000ccfff (prio 1000, ram): alias kvmvapic-rom 
> @pc.ram 000ca000-000ccfff

probably sgabios.bin is loaded to ca000 when enabled.

> On non-x86 emulators I see graphics=off has semantic effects beyond
> just controlling whether the firmware prints to the serial or not
> though.

It's been a while, but yes, IIRC on ppc this is passed to the linux
kernel somehow (device tree?) so it also affects the default console
device used by linux.

But sgabios is x86-only anyway so that should not be a problem here.

HTH & take care,
  Gerd

Re: [PATCH 2/2] vfio/common: Add trace point when a MMIO RAM section less than PAGE_SIZE

2021-09-08 Thread Alex Williamson

On Fri, 3 Sep 2021 17:36:11 +0800
Kunkun Jiang  wrote:

> The MSI-X structures of some devices and other non-MSI-X structures
> are in the same BAR. They may share one host page, especially in the
> case of large page granularity, suck as 64K.

s/suck/such/

> For example, MSIX-Table size of 82599 NIC is 0x30 and the offset in
> Bar 3(size 64KB) is 0x0. If host page size is 64KB.
> vfio_listenerregion_add() will be called to map the remaining range

s/vfio_listenerregion_add/vfio_listener_region_add/

> (0x30-0x). And it will return early at
> 'int128_ge((int128_make64(iova), llend))' and hasn't any message.
> Let's add a trace point to informed users like 5c08600547c did.

Please use the following syntax for referencing previous commits
(12-char sha1 is standard):

  commit 5c08600547c0 ("vfio: Use a trace point when a RAM section
  cannot be DMA mapped")

Thanks,
Alex

> Signed-off-by: Kunkun Jiang 
> ---
>  hw/vfio/common.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 8728d4d5c2..2fc6213c0f 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -892,6 +892,13 @@ static void vfio_listener_region_add(MemoryListener 
> *listener,
>  llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
>  
>  if (int128_ge(int128_make64(iova), llend)) {
> +if (memory_region_is_ram_device(section->mr)) {
> +trace_vfio_listener_region_add_no_dma_map(
> +memory_region_name(section->mr),
> +section->offset_within_address_space,
> +int128_getlo(section->size),
> +qemu_real_host_page_size);
> +}
>  return;
>  }
>  end = int128_get64(int128_sub(llend, int128_one()));

Re: [PATCH 1/2] vfio/pci: Fix vfio-pci sub-page MMIO BAR mmaping in live migration

2021-09-08 Thread Alex Williamson

On Fri, 3 Sep 2021 17:36:10 +0800
Kunkun Jiang  wrote:

> We expand MemoryRegions of vfio-pci sub-page MMIO BARs to
> vfio_pci_write_config to improve IO performance.
> The MemoryRegions of destination VM will not be expanded
> successful in live migration, because their addresses have
> been updated in vmstate_load_state (vfio_pci_load_config).

What's the call path through vfio_pci_write_config() that you're
relying on to get triggered to enable this and why wouldn't we just
walk all sub-page BARs in vfio_pci_load_config() to resolve the issue
then?  It's my understanding that we do this update in write-config
because it's required that the VM sizes the BAR before using it, which
is not the case when we resume from migration.  Thanks,

Alex
 
> Remove the restriction on base address change in
> vfio_pci_write_config for correct mmapping sub-page MMIO
> BARs. Accroding to my analysis, the remaining parameter
> verification is enough.
> 
> Fixes: c5e2fb3ce4d (vfio: Add save and load functions for VFIO PCI devices)
> Reported-by: Nianyao Tang 
> Reported-by: Qixin Gan 
> Signed-off-by: Kunkun Jiang 
> ---
>  hw/vfio/pci.c | 8 +---
>  1 file changed, 1 insertion(+), 7 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index e1ea1d8a23..891b211ddf 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1189,18 +1189,12 @@ void vfio_pci_write_config(PCIDevice *pdev,
>  }
>  } else if (ranges_overlap(addr, len, PCI_BASE_ADDRESS_0, 24) ||
>  range_covers_byte(addr, len, PCI_COMMAND)) {
> -pcibus_t old_addr[PCI_NUM_REGIONS - 1];
>  int bar;
>  
> -for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
> -old_addr[bar] = pdev->io_regions[bar].addr;
> -}
> -
>  pci_default_write_config(pdev, addr, val, len);
>  
>  for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
> -if (old_addr[bar] != pdev->io_regions[bar].addr &&
> -vdev->bars[bar].region.size > 0 &&
> +if (vdev->bars[bar].region.size > 0 &&
>  vdev->bars[bar].region.size < qemu_real_host_page_size) {
>  vfio_sub_page_bar_update_mapping(pdev, bar);
>  }

[PATCH] vmdk: allow specification of tools version

2021-09-08 Thread Thomas Weißschuh

VMDK files support an attribute that represents the version of the guest
tools that are installed on the disk.
This attribute is used by vSphere before a machine has been started to
determine if the VM has the guest tools installed.
This is important when configuring "Operating system customizations" in
vSphere, as it checks for the presence of the guest tools before
allowing those customizations.
Thus when the VM has not yet booted normally it would be impossible to
customize it, therefore preventing a customized first-boot.

The attribute should not hurt on disks that do not have the guest tools
installed and indeed the VMware tools also unconditionally add this
attribute.
(Defaulting to the value "2147483647", as is done in this patch)

Signed-off-by: Thomas Weißschuh 
---
 block/vmdk.c | 24 
 qapi/block-core.json |  2 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 4499f136bd..93ef6426b0 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -60,6 +60,7 @@
 #define VMDK_ZEROED  (-3)
 
 #define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
+#define BLOCK_OPT_TOOLSVERSION "toolsversion"
 
 typedef struct {
 uint32_t version;
@@ -2344,6 +2345,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
   BlockdevVmdkAdapterType adapter_type,
   const char *backing_file,
   const char *hw_version,
+  const char *toolsversion,
   bool compat6,
   bool zeroed_grain,
   vmdk_create_extent_fn extent_fn,
@@ -2384,7 +2386,8 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
 "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
 "ddb.geometry.heads = \"%" PRIu32 "\"\n"
 "ddb.geometry.sectors = \"63\"\n"
-"ddb.adapterType = \"%s\"\n";
+"ddb.adapterType = \"%s\"\n"
+"ddb.toolsVersion = \"%s\"\n";
 
 ext_desc_lines = g_string_new(NULL);
 
@@ -2401,6 +2404,9 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
 if (!hw_version) {
 hw_version = "4";
 }
+if (!toolsversion) {
+toolsversion = "2147483647";
+}
 
 if (adapter_type != BLOCKDEV_VMDK_ADAPTER_TYPE_IDE) {
 /* that's the number of heads with which vmware operates when
@@ -2525,7 +2531,8 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
size /
(int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
number_heads,
-   BlockdevVmdkAdapterType_str(adapter_type));
+   BlockdevVmdkAdapterType_str(adapter_type),
+   toolsversion);
 desc_len = strlen(desc);
 /* the descriptor offset = 0x200 */
 if (!split && !flat) {
@@ -2617,6 +2624,7 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver 
*drv,
 BlockdevVmdkAdapterType adapter_type_enum;
 char *backing_file = NULL;
 char *hw_version = NULL;
+char *toolsversion = NULL;
 char *fmt = NULL;
 BlockdevVmdkSubformat subformat;
 int ret = 0;
@@ -2649,6 +2657,7 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver 
*drv,
 adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
 backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
 hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION);
+toolsversion = qemu_opt_get_del(opts, BLOCK_OPT_TOOLSVERSION);
 compat6 = qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false);
 if (strcmp(hw_version, "undefined") == 0) {
 g_free(hw_version);
@@ -2692,14 +2701,15 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver 
*drv,
 .opts = opts,
 };
 ret = vmdk_co_do_create(total_size, subformat, adapter_type_enum,
-backing_file, hw_version, compat6, zeroed_grain,
-vmdk_co_create_opts_cb, , errp);
+backing_file, hw_version, toolsversion, compat6,
+zeroed_grain, vmdk_co_create_opts_cb, , errp);
 
 exit:
 g_free(backing_fmt);
 g_free(adapter_type);
 g_free(backing_file);
 g_free(hw_version);
+g_free(toolsversion);
 g_free(fmt);
 g_free(desc);
 g_free(path);
@@ -2782,6 +2792,7 @@ static int coroutine_fn 
vmdk_co_create(BlockdevCreateOptions *create_options,
 opts->adapter_type,
 opts->backing_file,
 opts->hwversion,
+opts->toolsversion,
 false,
 opts->zeroed_grain,
 vmdk_co_create_cb,
@@ -3031,6 +3042,11 @@ static QemuOptsList

Re: [PATCH v1 2/3] io: Add zerocopy and errqueue

2021-09-08 Thread Leonardo Bras Soares Passos

On Tue, Sep 7, 2021 at 8:06 AM Dr. David Alan Gilbert
 wrote:
> > Possibly, yes. This really need David G's input since he understands
> > the code in way more detail than me.
>
> Hmm I'm not entirely sure why we have the sync after each iteration;
> the case I can think of is if we're doing async sending, we could have
> two versions of the same page in flight (one from each iteration) -
> you'd want those to get there in the right order.
>
> Dave

Well, that's the thing: as we don't copy the buffer in MSG_ZEROCOPY,  we will in
fact have the same page in flight twice, instead of two versions,
given the buffer is
sent as it is during transmission.

For me it looks like there will be no change in the current algorithm,
but I am still
a beginner in migration code, and I am probably missing something.

Best regards,
Leo

Re: [PATCH] tcg/arm: Fix tcg_out_vec_op function signature

2021-09-08 Thread Philippe Mathieu-Daudé

On 9/8/21 8:53 PM, Jose R. Ziviani wrote:
> Commit 5e8892db93 fixed several function signatures but tcg_out_vec_op
> for arm is missing. It causes a build error on armv6 and armv7:
> 
> tcg-target.c.inc:2718:42: error: argument 5 of type 'const TCGArg *'
> {aka 'const unsigned int *'} declared as a pointer [-Werror=array-parameter=]
>const TCGArg *args, const int *const_args)
>   ~~^~~~
> ../tcg/tcg.c:120:41: note: previously declared as an array 'const TCGArg[16]'
> {aka 'const unsigned int[16]'}
>const TCGArg args[TCG_MAX_OP_ARGS],
>   ~~^~~~
> 
> Signed-off-by: Jose R. Ziviani 
> ---
>  tcg/arm/tcg-target.c.inc | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
> index 007ceee68e..e5b4f86841 100644
> --- a/tcg/arm/tcg-target.c.inc
> +++ b/tcg/arm/tcg-target.c.inc
> @@ -2715,7 +2715,8 @@ static const ARMInsn vec_cmp0_insn[16] = {
>  
>  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
> unsigned vecl, unsigned vece,
> -   const TCGArg *args, const int *const_args)
> +   const TCGArg args[TCG_MAX_OP_ARGS],
> +   const int const_args[TCG_MAX_OP_ARGS])
>  {
>  TCGType type = vecl + TCG_TYPE_V64;
>  unsigned q = vecl;
> 

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH] docs/devel: memory: Document MemoryRegionOps requirement

2021-09-08 Thread Philippe Mathieu-Daudé

On 9/8/21 8:50 PM, Peter Xu wrote:
> On Mon, Sep 06, 2021 at 03:01:54PM +0200, Philippe Mathieu-Daudé wrote:
>> On 9/6/21 2:20 PM, Bin Meng wrote:
>>> It's been a requirement that at least one function pointer for read
>>> and one for write are provided ever since the MemoryRegion APIs were
>>> introduced in 2012.
>>>
>>> Signed-off-by: Bin Meng 
>>> ---
>>>
>>>  docs/devel/memory.rst | 5 +
>>>  1 file changed, 5 insertions(+)
>>>
>>> diff --git a/docs/devel/memory.rst b/docs/devel/memory.rst
>>> index 5dc8a12682..7b589b21d2 100644
>>> --- a/docs/devel/memory.rst
>>> +++ b/docs/devel/memory.rst
>>> @@ -344,6 +344,11 @@ based on the attributes used for the memory 
>>> transaction, or need
>>>  to be able to respond that the access should provoke a bus error
>>>  rather than completing successfully; those devices can use the
>>>  ->read_with_attrs() and ->write_with_attrs() callbacks instead.
>>> +The requirement for a device's MemoryRegionOps is that at least
>>> +one callback for read and one for write are provided. If both
>>> +->read() and ->read_with_attrs() are provided, the plain ->read()
>>> +version takes precedence over the with_attrs() version. So does
>>> +the write callback.
>>
>> What about also adding a runtime check?
>>
>> -- >8 --
>> diff --git a/softmmu/memory.c b/softmmu/memory.c
>> index bfedaf9c4df..8ab602d3379 100644
>> --- a/softmmu/memory.c
>> +++ b/softmmu/memory.c
>> @@ -1516,6 +1516,17 @@ MemTxResult
>> memory_region_dispatch_write(MemoryRegion *mr,
>>  }
>>  }
>>
>> +static void memory_region_set_ops(MemoryRegion *mr, const
>> MemoryRegionOps *ops)
>> +{
>> +if (ops) {
>> +assert(ops->valid.accepts || (ops->read || ops->read_with_attrs));
>> +assert(ops->valid.accepts || (ops->write ||
>> ops->write_with_attrs));
> 
> Curious why accepts() matters.. Say, if there's only accepts() provided and it
> returned true, then I think we still can't avoid the coredump when read/write?

Good point :(

> I'm also curious what's the issue that Paolo mentioned here:
> 
> https://lore.kernel.org/qemu-devel/8da074de-7dff-6505-5180-720cf2f47...@redhat.com/
> 
> I believe Paolo was referring to this series from Prasad:
> 
> https://lore.kernel.org/qemu-devel/2020084133.672647-10-ppan...@redhat.com/
> 
> We may need to solve that issue then maybe we can consider revive Prasad's
> patchset?
>

Re: [PATCH v1 2/3] io: Add zerocopy and errqueue

2021-09-08 Thread Leonardo Bras Soares Passos

On Tue, Sep 7, 2021 at 1:44 PM Peter Xu  wrote:
>
> On Thu, Sep 02, 2021 at 03:59:25AM -0300, Leonardo Bras Soares Passos wrote:
> > I also suggested something like that, but I thought it could be good if we 
> > could
> > fall back to io_writev() if we didn't have the zerocopy feature (or
> > the async feature).
> >
> > What do you think?
>
> That fallback looks safe and ok, I'm just not sure whether it'll be of great
> help.  E.g. if we provide an QIO api that allows both sync write and zero-copy
> write (then we do the fallback when necessary), it means the buffer 
> implication
> applies too to this api, so it's easier to me to just detect the zero copy
> capability and use one alternative.  Thanks,
>
> --
> Peter Xu
>

I was thinking this way (async method with fallback) we would allow code using
the QIO api to just try to use the io_async_writev() whenever the code
seems fit, and
let the QIO channel layer to decide when it can be used
(implementation provides,
features available), and just fallback to io_writev() when it can't be used.

Best regards,
Leo

Re: [RFC v3 13/32] rust: use vendored-sources

2021-09-08 Thread Marc-André Lureau

Hi

On Wed, Sep 8, 2021 at 8:51 PM Ian Jackson  wrote:

> Marc-André Lureau writes ("Re: [RFC v3 13/32] rust: use vendored-sources"):
> > Yes, this is the shim to provide a C ABI QMP handler from Rust. This is
> where
> > all the FFI<->Rust conversion takes place.
> >
> > The "safe" code is qga/qmp/vcpus.rs. However, there is no
> > documentation there, since it's not meant to be the public
> > interface. It's documented with the QAPI schema.
>
> Right, thanks.  That does look like a PoC of a Rust API.  I wanted the
> rustdoc output because I find it provides a very uniform and readable
> presentation even of an API with no doc comments.
>
> I think maybe a thing I am missing is how you expect this to be used.
> Which parts of the system are going to be in Rust.  etc.
> And that would help explain what "public" means.
>
> I think the answer is probably in this example:
>
>
> https://patchew.org/QEMU/20210907121943.3498701-1-marcandre.lur...@redhat.com/20210907121943.3498701-30-marcandre.lur...@redhat.com/
>
> but although my C and Rust are both fine, I don't understand qemu well
> enough to make sense of it.
>
> ... wait, qga is "qemu guest agent" ?
>
> I think I am sort of seeing this use case now.  But presuambly there
> are other use cases for this QMP/QAPI type bridge stuff.
>
> Sorry to be asking such stupid questions.
>

There is no magic wand to introduce Rust code in an existing C code base.
You need to glue some C ABI to/from Rust. It's a lot of manual work to
properly bind a C API to Rust (it's a project on its own I would say).
Typically, FFI bindings can be automated from headers, and high-level Rust
bindings are done by hand. Then you want high-level bindings to take
advantage of Rust, for idiomatic and safe code. Various internal QEMU API
will have to be bound by hand to start using them from Rust. An isolated
unit (say a parser, a function) could be rewritten in Rust and a C ABI be
provided without much hassle. But in general, code is quickly
interdependent, or the amount of stuff to rewrite in one go is large and
risky to do it that way.

In the glib/gobject world, the ABI are annotated, and you can automate much
of the high-level binding process (which is amazing, given the complexity
of the APIs, with objects, async methods, signals, properties, .. various
concepts that don't match easily in Rust). To help with this process, they
introduced conversion traits (the ToQemu/FromQemu adapted here), common
interfaces, to help automate and compose complex types (and their own
binding generator).

(Unfortunately) QEMU doesn't use gobject, but it relies heavily on two type
systems of its own: QAPI and QOM. QAPI is actually more of an IDL, which
translates C from/to JSON/QMP and has commands and signals (and is the
protocol used to communicate with qemu or qemu-ga etc). A large part of
QEMU are direct users of the QAPI generated types and functions. It is thus
a good target to generate bindings automatically. As demonstrated at the
end, it allows writing QMP handlers in idiomatic Rust. Since
qemu-guest-agent doesn't have a complex internal state (most commands are
really independent, they could be different programs..!), I started
rewriting some handlers there. It feels relatively straightforward to
rewrite in Rust, and we could imagine a complete rewrite of qemu-ga...
However, it is less of a waste to focus on critical parts or newly added
code instead, imho! Furthermore, the Rust doesn't cover all targets C can
currently target, so we must have some care when deciding a language. (you
can imagine I would encourage anyone to do it in Rust!)

QOM is the QEMU object system and is spread throughout the code. If there
is enough interest for this Rust effort, I plan to look at binding it next
(based on my experience working on GObject bindings). I am afraid we are
not going to have as friendly bindings as what the GNOME team achieved, but
since QOM is a bit simpler, we may find acceptable compromises.

With QOM & QAPI, and manual bindings of internal APIs, I think we could
start writing interesting code in Rust, simple devices, external interfaces
etc.

 Hope that helps clarify a bit the current goals

-- 
Marc-André Lureau

Re: Compatibility between -device sga and -machine graphics=off

2021-09-08 Thread Paolo Bonzini

Oh, that's cool. It must be part of the kvmvapic migration data. Still,
there are very likely some rare cases that would break (on any machine),
e.g. if migrating while seabios is accessing the list of option roms.

Paolo

Il mer 8 set 2021, 18:36 Daniel P. Berrangé  ha
scritto:

> On Wed, Sep 08, 2021 at 06:28:01PM +0200, Paolo Bonzini wrote:
> > On 08/09/21 18:08, Daniel P. Berrangé wrote:
> > > Despite this difference, I was able migrate from a x86 guest using SGA
> > > to a guest using graphics=off without errors being reported. So it
> > > doesn't seem to change the migration data sections sent on the wire
> > > at least.
> >
> > It would probably break with Windows XP/2003 guests on AMD processors, as
> > those are the ones that use kvmvapic.  On other guests and processors,
> it's
> > a no-op.
>
> After running an incoming migrate, the target QEMU's  'info mtree'
> changed to reflect what the src QEMU originally reported.  So whatever
> difference 'graphics=off' causes compared to -device sga initially,
> appears to have been eliminated by running the migrate. Not sure if
> that's true in the general case or not - I'm just testing with a
> minimalist CLI and no real OS running
>
>  qemu-system-x86_64 -nodefaults -display sdl -device VGA -serial stdio
> -device sga
>
> vs
>
>  qemu-system-x86_64 -nodefaults -display sdl -device VGA -serial stdio
> -machine graphics=off
>
> Regards,
> Daniel
> --
> |: https://berrange.com  -o-
> https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-
> https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-
> https://www.instagram.com/dberrange :|
>
>

[PATCH] tcg/arm: Fix tcg_out_vec_op function signature

2021-09-08 Thread Jose R. Ziviani

Commit 5e8892db93 fixed several function signatures but tcg_out_vec_op
for arm is missing. It causes a build error on armv6 and armv7:

tcg-target.c.inc:2718:42: error: argument 5 of type 'const TCGArg *'
{aka 'const unsigned int *'} declared as a pointer [-Werror=array-parameter=]
   const TCGArg *args, const int *const_args)
  ~~^~~~
../tcg/tcg.c:120:41: note: previously declared as an array 'const TCGArg[16]'
{aka 'const unsigned int[16]'}
   const TCGArg args[TCG_MAX_OP_ARGS],
  ~~^~~~

Signed-off-by: Jose R. Ziviani 
---
 tcg/arm/tcg-target.c.inc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 007ceee68e..e5b4f86841 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -2715,7 +2715,8 @@ static const ARMInsn vec_cmp0_insn[16] = {
 
 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
unsigned vecl, unsigned vece,
-   const TCGArg *args, const int *const_args)
+   const TCGArg args[TCG_MAX_OP_ARGS],
+   const int const_args[TCG_MAX_OP_ARGS])
 {
 TCGType type = vecl + TCG_TYPE_V64;
 unsigned q = vecl;
-- 
2.33.0

Re: [PATCH] docs/devel: memory: Document MemoryRegionOps requirement

2021-09-08 Thread Peter Xu

On Mon, Sep 06, 2021 at 03:01:54PM +0200, Philippe Mathieu-Daudé wrote:
> On 9/6/21 2:20 PM, Bin Meng wrote:
> > It's been a requirement that at least one function pointer for read
> > and one for write are provided ever since the MemoryRegion APIs were
> > introduced in 2012.
> > 
> > Signed-off-by: Bin Meng 
> > ---
> > 
> >  docs/devel/memory.rst | 5 +
> >  1 file changed, 5 insertions(+)
> > 
> > diff --git a/docs/devel/memory.rst b/docs/devel/memory.rst
> > index 5dc8a12682..7b589b21d2 100644
> > --- a/docs/devel/memory.rst
> > +++ b/docs/devel/memory.rst
> > @@ -344,6 +344,11 @@ based on the attributes used for the memory 
> > transaction, or need
> >  to be able to respond that the access should provoke a bus error
> >  rather than completing successfully; those devices can use the
> >  ->read_with_attrs() and ->write_with_attrs() callbacks instead.
> > +The requirement for a device's MemoryRegionOps is that at least
> > +one callback for read and one for write are provided. If both
> > +->read() and ->read_with_attrs() are provided, the plain ->read()
> > +version takes precedence over the with_attrs() version. So does
> > +the write callback.
> 
> What about also adding a runtime check?
> 
> -- >8 --
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index bfedaf9c4df..8ab602d3379 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -1516,6 +1516,17 @@ MemTxResult
> memory_region_dispatch_write(MemoryRegion *mr,
>  }
>  }
> 
> +static void memory_region_set_ops(MemoryRegion *mr, const
> MemoryRegionOps *ops)
> +{
> +if (ops) {
> +assert(ops->valid.accepts || (ops->read || ops->read_with_attrs));
> +assert(ops->valid.accepts || (ops->write ||
> ops->write_with_attrs));

Curious why accepts() matters.. Say, if there's only accepts() provided and it
returned true, then I think we still can't avoid the coredump when read/write?

I'm also curious what's the issue that Paolo mentioned here:

https://lore.kernel.org/qemu-devel/8da074de-7dff-6505-5180-720cf2f47...@redhat.com/

I believe Paolo was referring to this series from Prasad:

https://lore.kernel.org/qemu-devel/2020084133.672647-10-ppan...@redhat.com/

We may need to solve that issue then maybe we can consider revive Prasad's
patchset?

-- 
Peter Xu

Re: [PATCH 01/10] aspeed/smc: Add watchdog Control/Status Registers

2021-09-08 Thread Peter Delevoryas



> On Sep 6, 2021, at 11:58 PM, Cédric Le Goater  wrote:
> 
> The Aspeed SoCs have a dual boot function for firmware fail-over
> recovery. The system auto-reboots from the second flash if the main
> flash does not boot sucessfully within a certain amount of time. This
> function is called alternate boot (ABR) in the FMC controllers.
> 
> On AST2400/AST2500, ABR is enabled by hardware strapping in SCU70 to
> enable the 2nd watchdog timer, on AST2600, through register SCU510.
> If the boot on the the main flash succeeds, the firmware should
> disable the 2nd watchdog timer. If not, the BMC is reset and the CE0
> and CE1 mappings are swapped to restart the BMC from the 2nd flash.
> 
> On the AST2600, the registers controlling the 2nd watchdog timer were
> moved from the watchdog register to the FMC controller. Add simple
> read/write handlers for these to let the FW disable the 2nd watchdog
> without error.
> 
> Reported-by: Peter Delevoryas 
> Signed-off-by: Cédric Le Goater 
> ---
> hw/ssi/aspeed_smc.c | 16 
> 1 file changed, 16 insertions(+)
> 
> diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
> index 331a2c544635..c9990f069ea4 100644
> --- a/hw/ssi/aspeed_smc.c
> +++ b/hw/ssi/aspeed_smc.c
> @@ -124,6 +124,13 @@
> /* SPI dummy cycle data */
> #define R_DUMMY_DATA  (0x54 / 4)
> 
> +/* FMC_WDT2 Control/Status Register for Alternate Boot (AST2600) */
> +#define R_FMC_WDT2_CTRL   (0x64 / 4)
> +#define   FMC_WDT2_CTRL_ALT_BOOT_MODEBIT(6) /* O: 2 chips 1: 1 chip */
> +#define   FMC_WDT2_CTRL_SINGLE_BOOT_MODE BIT(5)
> +#define   FMC_WDT2_CTRL_BOOT_SOURCE  BIT(4) /* O: primary 1: alternate */
> +#define   FMC_WDT2_CTRL_EN   BIT(0)
> +
> /* DMA Control/Status Register */
> #define R_DMA_CTRL(0x80 / 4)
> #define   DMA_CTRL_REQUEST  (1 << 31)
> @@ -263,12 +270,18 @@ static void aspeed_2600_smc_dma_ctrl(AspeedSMCState *s, 
> uint32_t value);
> 
> #define ASPEED_SMC_FEATURE_DMA   0x1
> #define ASPEED_SMC_FEATURE_DMA_GRANT 0x2
> +#define ASPEED_SMC_FEATURE_WDT_CONTROL 0x4
> 
> static inline bool aspeed_smc_has_dma(const AspeedSMCState *s)
> {
> return !!(s->ctrl->features & ASPEED_SMC_FEATURE_DMA);
> }
> 
> +static inline bool aspeed_smc_has_wdt_control(const AspeedSMCState *s)
> +{
> +return !!(s->ctrl->features & ASPEED_SMC_FEATURE_WDT_CONTROL);
> +}
> +
> static const AspeedSMCController controllers[] = {
> {
> .name  = "aspeed.smc-ast2400",
> @@ -1019,6 +1032,7 @@ static uint64_t aspeed_smc_read(void *opaque, hwaddr 
> addr, unsigned int size)
> addr == R_CE_CMD_CTRL ||
> addr == R_INTR_CTRL ||
> addr == R_DUMMY_DATA ||
> +(aspeed_smc_has_wdt_control(s) && addr == R_FMC_WDT2_CTRL) ||
> (aspeed_smc_has_dma(s) && addr == R_DMA_CTRL) ||
> (aspeed_smc_has_dma(s) && addr == R_DMA_FLASH_ADDR) ||
> (aspeed_smc_has_dma(s) && addr == R_DMA_DRAM_ADDR) ||
> @@ -1350,6 +1364,8 @@ static void aspeed_smc_write(void *opaque, hwaddr addr, 
> uint64_t data,
> s->regs[addr] = value & 0xff;
> } else if (addr == R_DUMMY_DATA) {
> s->regs[addr] = value & 0xff;
> +} else if (aspeed_smc_has_wdt_control(s) && addr == R_FMC_WDT2_CTRL) {
> +s->regs[addr] = value & 0xb;
> } else if (addr == R_INTR_CTRL) {
> s->regs[addr] = value;
> } else if (aspeed_smc_has_dma(s) && addr == R_DMA_CTRL) {
> -- 
> 2.31.1
> 

Looks good to me!

Reviewed-by: Peter Delevoryas 

One thing: should we enable this feature (ASPEED_SMC_FEATURE_WDT_CONTROL)
on any of the SMC controller models? I wanted to test this with the
Fuji image and machine type I added, and I needed the following diff
to enable this:

diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index 331a2c5446..b5d835d488 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -388,7 +388,7 @@ static const AspeedSMCController controllers[] = {
 .segments  = aspeed_segments_ast2600_fmc,
 .flash_window_base = ASPEED26_SOC_FMC_FLASH_BASE,
 .flash_window_size = 0x1000,
-.features  = ASPEED_SMC_FEATURE_DMA,
+.features  = ASPEED_SMC_FEATURE_DMA | 
ASPEED_SMC_FEATURE_WDT_CONTROL,
 .dma_flash_mask= 0x0FFC,
 .dma_dram_mask = 0x3FFC,
 .nregs = ASPEED_SMC_R_MAX,

Without this, Fuji would try to clear BIT(0) of R_FMC_WDT2_CTRL,
but then the default aspeed_smc_read() value would return 0xFFF.

Error: unable to disable the 2nd watchdog: FMC_WDT2=0x

And then with these changes added, the writes and reads work
so that BIT(0) appears to have been cleared:

Disabled the 2nd watchdog (FMC_WDT2) successfully.

root@bmc-oob:~# devmem 0x1e620064
0x
root@bmc-oob:~# devmem 0x1e620064 32 0x
root@bmc-oob:~# devmem 0x1e620064
0x000B

I don’t totally understand why the mask for the register is 0xb though?

>>> bin(0xb)
‘0b1011'

This doesn’t seem to match the macro BIT(...)

Re: [PATCH 4/5] qapi: introduce x-query-registers QMP command

2021-09-08 Thread Eric Blake

On Wed, Sep 08, 2021 at 11:37:10AM +0100, Daniel P. Berrangé wrote:
> This is a counterpart to the HMP "info registers" command. It is being
> added with an "x-" prefix because this QMP command is intended as an
> adhoc debugging tool and will thus not be modelled in QAPI as fully

ad hoc

> structured data, nor will it have long term guaranteed stability.
> 
> Signed-off-by: Daniel P. Berrangé 
> ---
>  hw/core/machine-qmp-cmds.c | 28 
>  qapi/machine.json  | 37 +
>  2 files changed, 65 insertions(+)
> 

> +++ b/qapi/machine.json
> @@ -1312,3 +1312,40 @@
>   '*cores': 'int',
>   '*threads': 'int',
>   '*maxcpus': 'int' } }
> +
> +##
> +# @RegisterParams:
> +#
> +# Information about the CPU to query state of
> +#
> +# @cpu: the CPU number to query. If omitted, queries all CPUs
> +#
> +# Since: 6.2.0

Consensus seems to be "6.2", not "6.2.0".

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 3/5] target/i386: convert to use format_state instead of dump_state

2021-09-08 Thread Eric Blake

On Wed, Sep 08, 2021 at 11:37:09AM +0100, Daniel P. Berrangé wrote:
> Signed-off-by: Daniel P. Berrangé 
> ---
>  target/i386/cpu-dump.c | 325 ++---
>  target/i386/cpu.c  |   2 +-
>  target/i386/cpu.h  |   2 +-
>  3 files changed, 174 insertions(+), 155 deletions(-)
> 
> diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c
> index 02b635a52c..8e19485a20 100644
> --- a/target/i386/cpu-dump.c
> +++ b/target/i386/cpu-dump.c
> @@ -94,41 +94,45 @@ static const char *cc_op_str[CC_OP_NB] = {
>  };
>  
>  static void
> -cpu_x86_dump_seg_cache(CPUX86State *env, FILE *f,
> +cpu_x86_dump_seg_cache(CPUX86State *env, GString *buf,
> const char *name, struct SegmentCache *sc)
>  {
>  #ifdef TARGET_X86_64
>  if (env->hflags & HF_CS64_MASK) {
> -qemu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name,
> - sc->selector, sc->base, sc->limit,
> - sc->flags & 0x0000);
> +g_string_append_printf(buf, "%-3s=%04x %016" PRIx64 " %08x %08x", 
> name,
> +   sc->selector, sc->base, sc->limit,
> +   sc->flags & 0x0000);

Did you consider using open_memstream() to get a FILE* that can then
be passed into these callbacks unchanged, rather than rewriting all
the callbacks to a new signature?

Then again, I like the GString signature better than FILE*, even if it
makes for longer lines.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH] vmdk: allow specification of tools version

2021-09-08 Thread Eric Blake

On Wed, Sep 08, 2021 at 07:42:50PM +0200, Thomas Weißschuh wrote:
> VMDK files support an attribute that represents the version of the guest
> tools that are installed on the disk.
> This attribute is used by vSphere before a machine has been started to
> determine if the VM has the guest tools installed.
> This is important when configuring "Operating system customizations" in
> vSphere, as it checks for the presence of the guest tools before
> allowing those customizations.
> Thus when the VM has not yet booted normally it would be impossible to
> customize it, therefore preventing a customized first-boot.
> 
> The attribute should not hurt on disks that do not have the guest tools
> installed and indeed the VMware tools also unconditionally add this
> attribute.
> (Defaulting to the value "2147483647", as is done in this patch)
> 
> Signed-off-by: Thomas Weißschuh 
> ---
>  block/vmdk.c | 24 
>  qapi/block-core.json |  2 ++
>  2 files changed, 22 insertions(+), 4 deletions(-)

UI review:

> +++ b/qapi/block-core.json
> @@ -4597,6 +4597,7 @@
>  # @adapter-type: The adapter type used to fill in the descriptor. Default: 
> ide.
>  # @hwversion: Hardware version. The meaningful options are "4" or "6".
>  # Default: "4".
> +# @toolsversion: VMware guest tools version.

Missing a '(since 6.2)' blurb, and a description of its default value.

>  # @zeroed-grain: Whether to enable zeroed-grain feature for sparse 
> subformats.
>  #Default: false.
>  #
> @@ -4610,6 +4611,7 @@
>  '*backing-file':'str',
>  '*adapter-type':'BlockdevVmdkAdapterType',
>  '*hwversion':   'str',
> +'*toolsversion':'str',

Is it an arbitrary string, or must a valid value always be parseable
as a numeric value?  If the latter, then make the QMP representation
numeric rather than string.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 1/5] docs/devel: document expectations for QAPI data modelling for QMP

2021-09-08 Thread Eric Blake

On Wed, Sep 08, 2021 at 11:37:07AM +0100, Daniel P. Berrangé wrote:
> Traditionally we have required that newly added QMP commands will model
> any returned data using fine grained QAPI types. This is good for
> commands that are intended to be consumed by machines, where clear data
> representation is very important. Commands that don't satisfy this have
> generally been added to HMP only.
> 
> In effect the decision of whether to add a new command to QMP vs HMP has
> been used as a proxy for the decision of whether the cost of designing a
> fine grained QAPI type is justified by the potential benefits.
> 
> As a result the commands present in QMP and HMP are non-overlapping
> sets, although HMP comamnds can be accessed indirectly via the QMP
> command 'human-monitor-command'.
> 
> One of the downsides of 'human-monitor-command' is that the QEMU monitor
> APIs remain tied into various internal parts of the QEMU code. For
> example any exclusively HMP command will need to use 'monitor_printf'
> to get data out. It would be desirable to be able to fully isolate the
> monitor implementation from QEMU internals, however, this is only
> possible if all commands are exclusively based on QAPI with direct
> QMP exposure.
> 
> The way to achieve this desired end goal is to finese the requirements
> for QMP command design. For cases where the output of a command is only
> intended for human consumption, it is reasonable to want to simplify
> the implementation by returning a plain string containing formatted
> data instead of designing a fine grained QAPI data type. This can be
> permitted if-and-only-if the command is exposed under the 'x-' name
> prefix. This indicates that the command data format is liable to
> future change and that it is not following QAPI design best practice.
> 
> The poster child example for this would be the 'info registers' HMP
> command which returns printf formatted data representing CPU state.
> This information varies enourmously across target architectures and
> changes relatively frequently as new CPU features are implemented.
> It is there as debugging data for human operators, and any machine
> usage would treat it as an opaque blob. It is thus reasonable to
> expose this in QMP as 'x-query-registers' returning a 'str' field.
> 
> Signed-off-by: Daniel P. Berrangé 
> ---
>  docs/devel/writing-qmp-commands.rst | 25 +
>  1 file changed, 25 insertions(+)
> 
> diff --git a/docs/devel/writing-qmp-commands.rst 
> b/docs/devel/writing-qmp-commands.rst
> index 6a10a06c48..d032daa62d 100644
> --- a/docs/devel/writing-qmp-commands.rst
> +++ b/docs/devel/writing-qmp-commands.rst
> @@ -350,6 +350,31 @@ In this section we will focus on user defined types. 
> Please, check the QAPI
>  documentation for information about the other types.
>  
>  
> +Modelling data in QAPI
> +~~
> +
> +For a QMP command that to be considered stable and supported long term there
> +is a requirement returned data should be explicitly modelled using fine 
> grained
> +QAPI types. As a general guide, a caller of the QMP command should never need
> +to parse individual returned data fields. If a field appears to need parsing,
> +them it should be split into separate fields corresponding to each distinct

then

> +data item. This should be the common case for any new QMP command that is
> +intended to be used by machines, as opposed to exclusively human operators.
> +
> +Some QMP commands, however, are only intended as adhoc debugging aids for 
> human

ad hoc

> +operators. While they may return large amounts of formatted data, it is not
> +expected that machines will need to parse the result. The overhead of 
> defining
> +a fine grained QAPI type for the data may not be justified by the potential
> +benefit. In such cases, it is permitted to have a command return a simple 
> string
> +that contains formatted data, however, it is mandatory for the command to use
> +the 'x-' name prefix. This indicates that the command is not guaranteed to be
> +long term stable / liable to change in future and is not following QAPI 
> design
> +best practices. An example where this approach is taken is the QMP command
> +"x-query-registers". This returns a printf formatted dump of the architecture
> +specific CPU state. The way the data is formatted varies across QEMU targets,
> +is liable to change over time, and is only intended to be consumed as an 
> opaque
> +string by machines.
> +
>  User Defined Types
>  ~~
>  
> -- 
> 2.31.1
>

Reviewed-by: Eric Blake 


-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH] hw/ssi: imx_spi: Improve chip select handling

2021-09-08 Thread Guenter Roeck


On 9/8/21 2:05 AM, Cheng, Xuzhou wrote:

Thanks Bin added me into this loop.

Hi, Guenter

I am interested in your patch and the issue what you found. I want to reproduce 
your issue on Linux, but I failed, the spi-nor of sabrelite on Linux does not 
work.

Could you share your Linux kernel version? It would be great if you can share 
the detailed steps to reproduce.

My test record:
Linux version: https://github.com/torvalds/linux, the last commit is 
ac08b1c68d1b1ed3cebb218fc3ea2c07484eb07d.
Linux configuration: imx_v6_v7_defconfig.

QEMU command:
qemu-system-arm -nographic -M sabrelite -smp 4 -m 1G -serial null -serial /dev/pts/50 
-kernel zImage -initrd rootfs.ext4 -append "root=/dev/ram rdinit=sbin/init" 
-dtb imx6q-sabrelite.dtb -net nic -net tap,ifname=tap_xcheng,script=no -drive 
file=flash.sabre,format=raw,if=mtd

Logs: there are same logs when I apply your patch or not apply. So I don't 
understand what this patch fixes, maybe I missed something? :(...

# cat /proc/mtd
dev:size   erasesize  name
mtd0: 0020 1000 "spi0.0"
# ls /dev/mtd*
/dev/mtd0   /dev/mtd0ro /dev/mtdblock0
# echo "01234567899876543210" > test
# dd if=test of=/dev/mtd0  /* flash.sabre is empty */
0+1 records in
0+1 records out
# dd if=/dev/mtd0 of=test_out
4096+0 records in
4096+0 records out
# cat test_out /* test_out is empty */



It is a flash. I don't think dd erases the flash, so unless your flash.sabre
is all-ones you probably won't see the overwritten data.

Guenter

Re: [RFC v3 13/32] rust: use vendored-sources

2021-09-08 Thread Ian Jackson

Marc-André Lureau writes ("Re: [RFC v3 13/32] rust: use vendored-sources"):
> Yes, this is the shim to provide a C ABI QMP handler from Rust. This is where
> all the FFI<->Rust conversion takes place.
> 
> The "safe" code is qga/qmp/vcpus.rs. However, there is no
> documentation there, since it's not meant to be the public
> interface. It's documented with the QAPI schema.

Right, thanks.  That does look like a PoC of a Rust API.  I wanted the
rustdoc output because I find it provides a very uniform and readable
presentation even of an API with no doc comments.

I think maybe a thing I am missing is how you expect this to be used.
Which parts of the system are going to be in Rust.  etc.
And that would help explain what "public" means.

I think the answer is probably in this example:

https://patchew.org/QEMU/20210907121943.3498701-1-marcandre.lur...@redhat.com/20210907121943.3498701-30-marcandre.lur...@redhat.com/

but although my C and Rust are both fine, I don't understand qemu well
enough to make sense of it.

... wait, qga is "qemu guest agent" ?

I think I am sort of seeing this use case now.  But presuambly there
are other use cases for this QMP/QAPI type bridge stuff.

Sorry to be asking such stupid questions.

Thanks,
Ian.

Re: Compatibility between -device sga and -machine graphics=off

2021-09-08 Thread Daniel P . Berrangé

On Wed, Sep 08, 2021 at 06:28:01PM +0200, Paolo Bonzini wrote:
> On 08/09/21 18:08, Daniel P. Berrangé wrote:
> > Despite this difference, I was able migrate from a x86 guest using SGA
> > to a guest using graphics=off without errors being reported. So it
> > doesn't seem to change the migration data sections sent on the wire
> > at least.
> 
> It would probably break with Windows XP/2003 guests on AMD processors, as
> those are the ones that use kvmvapic.  On other guests and processors, it's
> a no-op.

After running an incoming migrate, the target QEMU's  'info mtree'
changed to reflect what the src QEMU originally reported.  So whatever
difference 'graphics=off' causes compared to -device sga initially,
appears to have been eliminated by running the migrate. Not sure if
that's true in the general case or not - I'm just testing with a
minimalist CLI and no real OS running

 qemu-system-x86_64 -nodefaults -display sdl -device VGA -serial stdio  -device 
sga

vs

 qemu-system-x86_64 -nodefaults -display sdl -device VGA -serial stdio  
-machine graphics=off

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [RFC v3 13/32] rust: use vendored-sources

2021-09-08 Thread Marc-André Lureau

Hi

On Wed, Sep 8, 2021 at 8:29 PM Ian Jackson  wrote:

> Marc-André Lureau writes ("Re: [RFC v3 13/32] rust: use vendored-sources"):
> > You can start by reading `cargo doc -p common --open`. The generated
> > code needs some environment variables set, so `cargo doc -p qga`
> > will fail unless you set the environment variable
> >
> > MESON_BUILD_ROOT=`pwd` cargo doc -p qga --open --document-private-items
>
> Thanks.  I did this (and your rune from bofere) and I have the docs
> open.
>
> I wasn't quite sure where to start.  I didn't see where the
> entrypoints were.  I did find
>
>  .../target/doc/qga/qmp/fn.qmp_guest_set_vcpus.html
>
> which err, doesn't look like the kind of safe api I was hoping to
> find.
>

Yes, this is the shim to provide a C ABI QMP handler from Rust. This is
where all the FFI<->Rust conversion takes place.

The "safe" code is qga/qmp/vcpus.rs. However, there is no documentation
there, since it's not meant to be the public interface. It's documented
with the QAPI schema.

-- 
Marc-André Lureau

Re: Compatibility between -device sga and -machine graphics=off

2021-09-08 Thread Richard W.M. Jones



Just commenting from the libguestfs POV:

We don't care about migration, and:

On Wed, Sep 08, 2021 at 05:08:08PM +0100, Daniel P. Berrangé wrote:
> On non-x86 emulators I see graphics=off has semantic effects beyond
> just controlling whether the firmware prints to the serial or not
> though. IOW it is overloaded to do multiple jobs, while -device sga
> only did one specific job. This makes graphics=off somewhat undesirable
> to use.  We're possibly lucky in this specific case though, because
> the 'sgabios.bin' ROM is x86 asm code, so was never valid to use in
> the non-x86 case.

we only enable  on i686 & x86-64.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://libguestfs.org

Re: [PATCH v4 05/12] job: @force parameter for job_cancel_sync()

2021-09-08 Thread Vladimir Sementsov-Ogievskiy


07.09.2021 15:42, Hanna Reitz wrote:

Callers should be able to specify whether they want job_cancel_sync() to
force-cancel the job or not.

In fact, almost all invocations do not care about consistency of the
result and just want the job to terminate as soon as possible, so they
should pass force=true.  The replication block driver is the exception,
specifically the active commit job it runs.

As for job_cancel_sync_all(), all callers want it to force-cancel all
jobs, because that is the point of it: To cancel all remaining jobs as
quickly as possible (generally on process termination).  So make it
invoke job_cancel_sync() with force=true.

This changes some iotest outputs, because quitting qemu while a mirror
job is active will now lead to it being cancelled instead of completed,
which is what we want.  (Cancelling a READY mirror job with force=false
may take an indefinite amount of time, which we do not want when
quitting.  If users want consistent results, they must have all jobs be
done before they quit qemu.)

Buglink:https://gitlab.com/qemu-project/qemu/-/issues/462
Signed-off-by: Hanna Reitz


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir

Re: [RFC v3 13/32] rust: use vendored-sources

2021-09-08 Thread Ian Jackson

Marc-André Lureau writes ("Re: [RFC v3 13/32] rust: use vendored-sources"):
> You can start by reading `cargo doc -p common --open`. The generated
> code needs some environment variables set, so `cargo doc -p qga`
> will fail unless you set the environment variable
> 
> MESON_BUILD_ROOT=`pwd` cargo doc -p qga --open --document-private-items

Thanks.  I did this (and your rune from bofere) and I have the docs
open.

I wasn't quite sure where to start.  I didn't see where the
entrypoints were.  I did find

 .../target/doc/qga/qmp/fn.qmp_guest_set_vcpus.html

which err, doesn't look like the kind of safe api I was hoping to
find.

Ian.

Re: Compatibility between -device sga and -machine graphics=off

2021-09-08 Thread Paolo Bonzini


On 08/09/21 18:08, Daniel P. Berrangé wrote:

Despite this difference, I was able migrate from a x86 guest using SGA
to a guest using graphics=off without errors being reported. So it
doesn't seem to change the migration data sections sent on the wire
at least.


It would probably break with Windows XP/2003 guests on AMD processors, 
as those are the ones that use kvmvapic.  On other guests and 
processors, it's a no-op.


Paolo

Re: [PATCH] hw/i386/acpi-build: adjust q35 IO addr range for acpi pci hotplug

2021-09-08 Thread Ani Sinha



On Wed, 8 Sep 2021, Philippe Mathieu-Daudé wrote:

> On 9/8/21 10:43 AM, Igor Mammedov wrote:
> > On Wed, 8 Sep 2021 12:51:04 +0530 (IST)
> > Ani Sinha  wrote:
> >
> >> On Wed, 8 Sep 2021, Igor Mammedov wrote:
> >>
> >>> On Wed,  8 Sep 2021 09:41:39 +0530
> >>> Ani Sinha  wrote:
> >>>
>  Change caf108bc58790 ("hw/i386/acpi-build: Add ACPI PCI hot-plug methods 
>  to Q35")
>  selects an IO address range for acpi based PCI hotplug for q35 
>  arbitrarily. It
>  starts at address 0x0cc4 and ends at 0x0cdb. It was assumed that this 
>  address
>  range was free and available. However, upon more testing, it seems this 
>  address
>  range to be not available for some latest versions of windows.
> >>>
> >>> The range is something assigned by QEMU, and guest has no say where it 
> >>> should be.
> >>> but perhaps we failed to describe it properly or something similar, so 
> >>> one gets
> >>> 'no resource' error.
> >>
> >> OK dug deeper. The existing range of IO address conflicts with the CPU
> >> hotplug range.
> >>
> >> CPU hotplug range (ICH9_CPU_HOTPLUG_IO_BASE) is 0x0cd8 to 0x0ce3
> >>
> >> This intersects with range 0x0cc4 to 0x0cdb for ACPI_PCIHP_ADDR_ICH9 .
> >
> > Looking at 'info mtree' it's indeed wrong:
> >
> > 0cc4-0cdb (prio 0, i/o): acpi-pci-hotplug
> > 0cd8-0cf7 (prio 0, i/o): acpi-cpu-hotplug
> >
> > which of them eventually handles IO request in intersection range?
>
> (qemu) info mtree -f
> FlatView #0
>  AS "I/O", root: io
>  Root memory region: io
>   0cc4-0cd7 (prio 0, i/o): acpi-pci-hotplug
>   0cd8-0cf7 (prio 0, i/o): acpi-cpu-hotplug
>
> >
> > Please, add to commit message your findings, so it would point out
> > where problem comes from and what it breaks(doesn't work as expect).
> >
> > Given it's broken to begin with (and possibly regression if it broke cpu 
> > hotplug),

right. I did some foresic analysis on this. So the value 0x0cc4 comes from
the original RFC patch unchanged that Julia posted:

https://patchew.org/QEMU/20200924070013.165026-1-jus...@redhat.com/20200924070013.165026-3-jus...@redhat.com/

Meanwhile, between the time that RFC patch was posted and when it was
actually pushed, you made the following change:
b32bd763a1ca9296 ("pci: introduce acpi-index property for PCI device")

This change did this:

-#define ACPI_PCIHP_SIZE 0x0014
+#define ACPI_PCIHP_SIZE 0x0018

So now the IO address ranges are no longer mutually exclusive :-)

Re: [RFC v3 13/32] rust: use vendored-sources

2021-09-08 Thread Ian Jackson

Marc-André Lureau writes ("Re: [RFC v3 13/32] rust: use vendored-sources"):
> On Wed, Sep 8, 2021 at 7:55 PM Ian Jackson  wrote:
>  >   git submodules are just awful IMO.
> 
> Yes, but it's often (always?) the user fault.

I must disagree in the strongest possible terms.  I don't think I can
express my feelings on this in a way that would be appropriate in this
context.

Anyway...

My trickery as described above (run configure, edit the "replace-with"
out of .cargo/config.toml, run make) did produce a build.

But to review the internal API I want the rustdoc output.

How can I run rustdoc in a way that will work ?  I tried "cargo doc"
and it complained about a lack of "MESON_BUILD_ROOT".  I guessed and
ran
  MESON_BUILD_ROOT=$PWD cargo doc
which seemed to produce some output and complete but I can't find the
results anywhere.

Can you please give me the set of runes to type view the rustdoc-built
API documentation for the qemu-internal Rust APIs ?

Thanks,
Ian.

Re: [RFC v3 13/32] rust: use vendored-sources

2021-09-08 Thread Peter Maydell

On Wed, 8 Sept 2021 at 17:17, Marc-André Lureau
 wrote:
>
> Hi
>
> On Wed, Sep 8, 2021 at 7:55 PM Ian Jackson  wrote:
>>
>> Marc-André Lureau writes ("Re: [RFC v3 13/32] rust: use vendored-sources"):
>> > Hmm, I do "cargo vendor --versioned-dirs ../rust/vendored" to vendor 
>> > crates.
>> >
>> > It seems cc was updated, and I didn't update the submodule accordingly. For
>> > reference, this is the dependency tree that WFM:
>>
>> git submodules are just awful IMO.
>
>
> Yes, but it's often (always?) the user fault.

I tend to agree with Ian -- submodules are badly designed, and
have lots of sharp edges that it's easy to cut yourself on.
Yes, you can say "well, the user should have held it by the other handle
because that one isn't fitted with the spring-loaded razorblades", but I
would argue that fault is better placed at the door of the designer in
that kind of situation...

-- PMM

Re: [PATCH v4 05/12] job: @force parameter for job_cancel_sync()

2021-09-08 Thread Eric Blake

On Tue, Sep 07, 2021 at 02:42:38PM +0200, Hanna Reitz wrote:
> Callers should be able to specify whether they want job_cancel_sync() to
> force-cancel the job or not.
> 
> In fact, almost all invocations do not care about consistency of the
> result and just want the job to terminate as soon as possible, so they
> should pass force=true.  The replication block driver is the exception,
> specifically the active commit job it runs.
> 
> As for job_cancel_sync_all(), all callers want it to force-cancel all
> jobs, because that is the point of it: To cancel all remaining jobs as
> quickly as possible (generally on process termination).  So make it
> invoke job_cancel_sync() with force=true.
> 
> This changes some iotest outputs, because quitting qemu while a mirror
> job is active will now lead to it being cancelled instead of completed,
> which is what we want.  (Cancelling a READY mirror job with force=false
> may take an indefinite amount of time, which we do not want when
> quitting.  If users want consistent results, they must have all jobs be
> done before they quit qemu.)
> 
> Buglink: https://gitlab.com/qemu-project/qemu/-/issues/462
> Signed-off-by: Hanna Reitz 
> ---
>  include/qemu/job.h| 10 ++---
>  block/replication.c   |  4 +-
>  blockdev.c|  4 +-
>  job.c | 18 ++--
>  tests/unit/test-blockjob.c|  2 +-
>  tests/qemu-iotests/109.out| 60 +++
>  tests/qemu-iotests/tests/qsd-jobs.out |  2 +-
>  7 files changed, 50 insertions(+), 50 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [RFC v3 13/32] rust: use vendored-sources

2021-09-08 Thread Marc-André Lureau

Hi

On Wed, Sep 8, 2021 at 7:40 PM Ian Jackson  wrote:

> marcandre.lur...@redhat.com writes ("[RFC v3 13/32] rust: use
> vendored-sources"):
> > Most likely, QEMU will want tighter control over the sources, rather
> > than relying on crates.io downloading, use a git submodule with all the
> > dependencies. However, cargo --offline was added in 1.36.
>
> Hi.
>
> pm215 pointed me at this, as I have some background in Rust.
> I definitely approve of having Rust in Qemu.  I don't have an opinion
> about whether the sources should be vendored this way.
>
> But, I tried to build this, and
>
> error: failed to select a version for the requirement `cc = "=1.0.70"`
> candidate versions found which didn't match: 1.0.69
> location searched: directory source
> `/volatile/rustcargo/Rustup/Qemu/qemu.pwt/rust/vendored` (which is
> replacing registry `crates-io`)
> required by package `nix v0.20.1`
> ... which is depended on by `qga v0.1.0
> (/volatile/rustcargo/Rustup/Qemu/qemu.pwt/qga)`
> perhaps a crate was updated and forgotten to be re-vendored?
> As a reminder, you're using offline mode (--offline) which can
> sometimes cause surprising resolution failures, if this error is too
> confusing you may wish to retry without the offline flag.
>
> I think the most important part here is to get the general APIs,
> presented to general Rust code in Qemu, right.  So I wanted to review
> those via the output from rustdoc.
>

You can start by reading `cargo doc -p common --open`. The generated code
needs some environment variables set, so `cargo doc -p qga` will fail
unless you set the environment variable

MESON_BUILD_ROOT=`pwd` cargo doc -p qga --open --document-private-items

works, but the QAPI types aren't documented, so this is a bit useless at
this point. I wonder if I could put the schema doc, hmm...


> I tried commenting out the `replace-with` in .cargo/config.toml
> but evidently the systme isn't intended to be used that way.
>
> Ian.
>
>

-- 
Marc-André Lureau

Re: [PATCH 0/5] Stop adding HMP-only commands, allow QMP for all

2021-09-08 Thread Philippe Mathieu-Daudé

On 9/8/21 5:09 PM, Markus Armbruster wrote:
> Daniel P. Berrangé  writes:
> 
>> We are still adding HMP commands without any QMP counterparts. This is
>> done because there are a reasonable number of scenarios where the cost
>> of designing a QAPI data type for the command is not justified.
>>
>> This has the downside, however, that we will never be able to fully
>> isolate the monitor code from the remainder of QEMU internals. It is
>> desirable to be able to get to a point where subsystems in QEMU are
>> exclusively implemented using QAPI types and never need to have any
>> knowledge of the monitor APIs.
>>
>> The way to get there is to stop adding commands to HMP only. All
>> commands must be implemented using QMP, and any HMP implementation
>> be a shim around the QMP implementation.
>>
>> We don't want to compromise our supportability of QMP long term though.
>>
>> This series proposes that we relax our requirements around fine grained
>> QAPI data design,
> 
> Specifics?  QMP command returns a string, HMP wrapper prints that
> string?
> 
>>   but with the caveat that any command taking this
>> design approach is mandated to use the 'x-' name prefix.
>>
>> This tradeoff should be suitable for any commands we have been adding
>> exclusively to HMP in recent times, and thus mean we have mandate QMP
>> support for all new commands going forward.
>>
>> This series illustrates the concept by converting the "info registers"
>> HMP to invoke a new 'x-query-registers' QMP command. Note that only
>> the i386 CPU target is converted to work with this new approach, so
>> this series needs to be considered incomplete. If we go forward with
>> this idea, then a subsequent version of this series would need to
>> obviously convert all other CPU targets.
>>
>> After doing that conversion the only use of qemu_fprintf() would be
>> the disas.c file. Remaining uses of qemu_fprintf and qemu_printf
>> could be tackled in a similar way and eventually eliminate the need
>> for any of these printf wrappers in QEMU.
>>
>> NB: I added docs to devel/writing-qmp-commands.rst about the two
>> design approaches to QMP. I didn't see another good place to put
>> an explicit note that we will not add any more HMP-only commands.
>> Obviously HMP/QMP maintainers control this in their reviews of
>> patches, and maybe that's sufficient ?
> 
> We could add devel/writing-hmp-commands.rst, or go with a single
> document devel/writing-monitor-commands.rst.
> 
>> NB: if we take this approach we'll want to figure out how many
>> HMP-only commands we actually have left and then perhaps have
>> HMP-only commands we actually have left
> 
> Yes.
> 
> For many HMP commands, a QMP commands with the same name exists, and the
> former is probably a wrapper around the latter.  Same for HMP "info FOO"
> and QMP query-FOO.
> 
> HMP commands without such a match:

(1) Handy HMP commands while debugging:

- help
- info *
- i/o
- loadvm/savevm
- trace-event/trace-file
- wavcapture/stopcapture
- xp

Eventually also:

- hostfwd_add/hostfwd_remove
- log
- logfile
- print
- sendkey

(2) I suppose these are pre-QMP and wonder about their
usefulness in the monitor (I'd certainly use a QMP
equivalent to test).

- migrate_set_capability
- migrate_set_parameter
- migration_mode
- mouse_button
- mouse_move
- mouse_set
- nmi
- pcie_aer_inject_error
- exit_preconfig
- singlestep
- snapshot_blkdev_internal
- snapshot_delete_blkdev_internal

(3) I don't use them, I expect them to belong
in either (1) or (2).

> boot_set
> change
> commit
> cpu
> delvm
> drive_add
> drive_del
> gdbserver
> gpa2hpa
> gpa2hva
> gva2gpa
> mce
> qemu-io
> snapshot_blkdev
> sum
> sync-profile
> watchdog_action

> 
> This is 77 out of 170 HMP commands.  I was hoping for fewer.

Re: [RFC v3 13/32] rust: use vendored-sources

2021-09-08 Thread Marc-André Lureau

Hi

On Wed, Sep 8, 2021 at 7:55 PM Ian Jackson  wrote:

> Marc-André Lureau writes ("Re: [RFC v3 13/32] rust: use vendored-sources"):
> > Hmm, I do "cargo vendor --versioned-dirs ../rust/vendored" to vendor
> crates.
> >
> > It seems cc was updated, and I didn't update the submodule accordingly.
> For
> > reference, this is the dependency tree that WFM:
>
> git submodules are just awful IMO.
>

Yes, but it's often (always?) the user fault. CI should help, when it will
check Rust code.


> > $ cargo tree -p qga
> > qga v0.1.0 (/home/elmarco/src/qemu/qga)
> > ├── common v0.1.0 (/home/elmarco/src/qemu/rust/common)
> > │   ├── libc v0.2.101
> > │   └── nix v0.20.1
> > │   ├── bitflags v1.2.1
> > │   ├── cfg-if v1.0.0
> > │   ├── libc v0.2.101
> > │   └── memoffset v0.6.4
> > │   [build-dependencies]
> > │   └── autocfg v1.0.1
> > ├── hostname v0.3.1
> > │   ├── libc v0.2.101
> > │   └── match_cfg v0.1.0
> > └── nix v0.20.1 (*)
>
> With the .config/cargo.toml "replace-with" commented out, I see this:
>
> rustcargo@zealot:~/Rustup/Qemu/qemu.pwt/build$ cargo tree -p qga
> qga v0.1.0 (/volatile/rustcargo/Rustup/Qemu/qemu.pwt/qga)
> ├── common v0.1.0 (/volatile/rustcargo/Rustup/Qemu/qemu.pwt/rust/common)
> │   ├── libc v0.2.101
> │   └── nix v0.20.1
> │   ├── bitflags v1.2.1
> │   ├── cfg-if v1.0.0
> │   ├── libc v0.2.101
> │   └── memoffset v0.6.4
> │   [build-dependencies]
> │   └── autocfg v1.0.1
> ├── hostname v0.3.1
> │   ├── libc v0.2.101
> │   └── match_cfg v0.1.0
> └── nix v0.20.1 (*)
> rustcargo@zealot:~/Rustup/Qemu/qemu.pwt/build$
>
> Which is the same as yours.  Although "cargo build" doesn't work
> build, guessed from the messagese that perhaps this was the automatic
> codegen hadn't run.  I'm now trying "make" and and it seems to be
> running.
>
> With the "replace-with" uncommented, cargo tree bombs out.  I'm afraid
> I haven't used cargo vendor so I'm not sure if I am going in the right
> direction with this workaround.  Hopefully it will finish the build.
>
> Would it be possible to have a configure option to use unvendored
> upstream Rust libraries from crates.io ?
>

Not easily, but we could have a --disable-rust-offline configure option.
Whether this is desirable, I am not sure.


-- 
Marc-André Lureau

Compatibility between -device sga and -machine graphics=off

2021-09-08 Thread Daniel P . Berrangé

Given the libvirt XML snippet

  
...

...
  

libvirt QEMU driver will always format

  -device sga

Libguestfs uses this syntax, so we need to make sure it still works
in future even if 'sga' is disabled or removed in a QEMU build in
favour of SeaBIOS' built-in support.


There are the following combinations that matter

 1. Graphical display only, no serial port, BIOS to graphical display

 
   
 

   QEMU cli

 -device VGA


 2. Serial port only, no graphics, BIOS to nowhere

 

   QEMU cli

 -serial pty


 3. Serial port only, no graphics, BIOS to serial

 
   
 
 

   QEMU cli

 -serial pty -device sga
 -serial pty -M graphics=off


 4. Graphical display, serial port, BIOS only to graphical display

 
   
 
 

   QEMU cli

 -device VGA -serial pty


 5. Graphical display, serial port, BIOS to graphical display + serial

 
   
 
 
   
 
 

   QEMU cli

 -device VGA -serial pty -device sga
 -device VGA -serial pty -M graphics=off



The cases 3 and 5 appear to have the same effect on output with x86
for SGA and graphics=off, which is good.

If I do 'info mtree' though, I do see a different memory layout
when changing from SGA to graphics=off


@@ -18,7 +18,7 @@
 000c-000c3fff (prio 1, ram): alias pam-rom @pc.ram 
000c-000c3fff
 000c4000-000c7fff (prio 1, ram): alias pam-rom @pc.ram 
000c4000-000c7fff
 000c8000-000cbfff (prio 1, ram): alias pam-rom @pc.ram 
000c8000-000cbfff
-000cb000-000cdfff (prio 1000, ram): alias kvmvapic-rom 
@pc.ram 000cb000-000cdfff
+000ca000-000ccfff (prio 1000, ram): alias kvmvapic-rom 
@pc.ram 000ca000-000ccfff
 000cc000-000c (prio 1, ram): alias pam-rom @pc.ram 
000cc000-000c
 000d-000d3fff (prio 1, ram): alias pam-rom @pc.ram 
000d-000d3fff
 000d4000-000d7fff (prio 1, ram): alias pam-rom @pc.ram 
000d4000-000d7fff
@@ -109,7 +109,7 @@
 000c-000c3fff (prio 1, ram): alias pam-rom @pc.ram 
000c-000c3fff
 000c4000-000c7fff (prio 1, ram): alias pam-rom @pc.ram 
000c4000-000c7fff
 000c8000-000cbfff (prio 1, ram): alias pam-rom @pc.ram 
000c8000-000cbfff
-000cb000-000cdfff (prio 1000, ram): alias kvmvapic-rom 
@pc.ram 000cb000-000cdfff
+000ca000-000ccfff (prio 1000, ram): alias kvmvapic-rom 
@pc.ram 000ca000-000ccfff
 000cc000-000c (prio 1, ram): alias pam-rom @pc.ram 
000cc000-000c
 000d-000d3fff (prio 1, ram): alias pam-rom @pc.ram 
000d-000d3fff
 000d4000-000d7fff (prio 1, ram): alias pam-rom @pc.ram 
000d4000-000d7fff
@@ -185,7 +185,7 @@
 000c-000c3fff (prio 1, ram): alias pam-rom @pc.ram 
000c-000c3fff
 000c4000-000c7fff (prio 1, ram): alias pam-rom @pc.ram 
000c4000-000c7fff
 000c8000-000cbfff (prio 1, ram): alias pam-rom @pc.ram 
000c8000-000cbfff
-000cb000-000cdfff (prio 1000, ram): alias kvmvapic-rom 
@pc.ram 000cb000-000cdfff
+000ca000-000ccfff (prio 1000, ram): alias kvmvapic-rom 
@pc.ram 000ca000-000ccfff
 000cc000-000c (prio 1, ram): alias pam-rom @pc.ram 
000cc000-000c
 000d-000d3fff (prio 1, ram): alias pam-rom @pc.ram 
000d-000d3fff
 000d4000-000d7fff (prio 1, ram): alias pam-rom @pc.ram 
000d4000-000d7fff


Despite this difference, I was able migrate from a x86 guest using SGA
to a guest using graphics=off without errors being reported. So it
doesn't seem to change the migration data sections sent on the wire
at least.

With this in mind, is the different memory layout harmless from the
POV of migration, or is it none the less going to cause subtle
problems for the guest at some point in future ?


On non-x86 emulators I see graphics=off has semantic effects beyond
just controlling whether the firmware prints to the serial or not
though. IOW it is overloaded to do multiple jobs, while -device sga
only did one specific job. This makes graphics=off somewhat undesirable
to use.  We're possibly lucky in this specific case though, because
the 'sgabios.bin' ROM is x86 asm code, so was never valid to use in
the non-x86 case.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|:

Re: [PATCH v2 3/9] linux-user: Split signal-related prototypes into signal-common.h

2021-09-08 Thread Philippe Mathieu-Daudé

On 9/8/21 5:43 PM, Peter Maydell wrote:
> Split the signal related prototypes into the existing header file
> signal-common.h, and include it in those places that now require it.
> 
> Signed-off-by: Peter Maydell 
> ---
> v1->v2: use existing signal-common.h instead of new header
> ---
>  linux-user/qemu.h| 36 
>  linux-user/signal-common.h   | 36 
>  linux-user/aarch64/cpu_loop.c|  1 +
>  linux-user/alpha/cpu_loop.c  |  1 +
>  linux-user/arm/cpu_loop.c|  1 +
>  linux-user/cris/cpu_loop.c   |  1 +
>  linux-user/fd-trans.c|  1 +
>  linux-user/hexagon/cpu_loop.c|  1 +
>  linux-user/hppa/cpu_loop.c   |  1 +
>  linux-user/i386/cpu_loop.c   |  1 +
>  linux-user/m68k/cpu_loop.c   |  1 +
>  linux-user/main.c|  1 +
>  linux-user/microblaze/cpu_loop.c |  1 +
>  linux-user/mips/cpu_loop.c   |  1 +
>  linux-user/nios2/cpu_loop.c  |  1 +
>  linux-user/openrisc/cpu_loop.c   |  1 +
>  linux-user/ppc/cpu_loop.c|  1 +
>  linux-user/riscv/cpu_loop.c  |  1 +
>  linux-user/s390x/cpu_loop.c  |  1 +
>  linux-user/sh4/cpu_loop.c|  1 +
>  linux-user/sparc/cpu_loop.c  |  1 +
>  linux-user/syscall.c |  1 +
>  linux-user/xtensa/cpu_loop.c |  1 +
>  23 files changed, 57 insertions(+), 36 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v2 5/9] linux-user: Split mmap prototypes into user-mmap.h

2021-09-08 Thread Philippe Mathieu-Daudé

On 9/8/21 5:44 PM, Peter Maydell wrote:
> Split out the mmap prototypes into a new header user-mmap.h
> which we only include where required.
> 
> Signed-off-by: Peter Maydell 
> ---
>  linux-user/qemu.h  | 14 --
>  linux-user/user-mmap.h | 34 ++
>  linux-user/elfload.c   |  1 +
>  linux-user/flatload.c  |  1 +
>  linux-user/i386/cpu_loop.c |  1 +
>  linux-user/main.c  |  1 +
>  linux-user/mmap.c  |  1 +
>  linux-user/syscall.c   |  1 +
>  8 files changed, 40 insertions(+), 14 deletions(-)
>  create mode 100644 linux-user/user-mmap.h

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v2 4/9] linux-user: Split loader-related prototypes into loader.h

2021-09-08 Thread Philippe Mathieu-Daudé

On 9/8/21 5:44 PM, Peter Maydell wrote:
> Split guest-binary loader prototypes out into a new header
> loader.h which we include only where required.
> 
> Signed-off-by: Peter Maydell 
> ---
>  linux-user/loader.h| 59 ++
>  linux-user/qemu.h  | 40 
>  linux-user/elfload.c   |  1 +
>  linux-user/flatload.c  |  1 +
>  linux-user/linuxload.c |  1 +
>  linux-user/main.c  |  1 +
>  linux-user/signal.c|  1 +
>  linux-user/syscall.c   |  1 +
>  8 files changed, 65 insertions(+), 40 deletions(-)
>  create mode 100644 linux-user/loader.h
> 
> diff --git a/linux-user/loader.h b/linux-user/loader.h

> +void do_init_thread(struct target_pt_regs *regs, struct image_info *infop);
> +abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
> +  abi_ulong stringp, int push_ptr);
> +int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
> + struct target_pt_regs *regs, struct image_info *infop,
> + struct linux_binprm *);

Pre-existing invalid style alignment, otherwise:
Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH RFC server v2 05/11] vfio-user: run vfio-user context

2021-09-08 Thread Stefan Hajnoczi

On Wed, Sep 08, 2021 at 03:21:19PM +, John Levon wrote:
> On Wed, Sep 08, 2021 at 04:02:22PM +0100, Stefan Hajnoczi wrote:
> 
> > > We'd have to have a whole separate API to do that, so a separate thread 
> > > seems a
> > > better approach?
> > 
> > Whether to support non-blocking properly in libvfio-user is a decision
> > for you. If libvfio-user doesn't support non-blocking, then QEMU should
> > run a dedicated thread instead of the partially non-blocking approach in
> > this patch.
> 
> Right, sure. At this point we don't have any plans to implement a separate 
> async
> API due to the amount of work involved. 
> 
> > A non-blocking approach is nice when there are many devices hosted in a
> > single process or a lot of async replies (which requires extra thread
> > synchronization with the blocking approach).
> 
> I suppose this would be more of a problem with devices where the I/O path has 
> to
> be handled via the socket.

Yes, exactly. I think it shouldn't be a problem when shared memory is
used and the irqfd (eventfd) mechanism is used for IRQs. It becomes slow
when there's no shared memory or if raising IRQs requires protocol
messages.

Stefan


signature.asc
Description: PGP signature

Re: [RFC v3 13/32] rust: use vendored-sources

2021-09-08 Thread Ian Jackson

Marc-André Lureau writes ("Re: [RFC v3 13/32] rust: use vendored-sources"):
> Hmm, I do "cargo vendor --versioned-dirs ../rust/vendored" to vendor crates.
> 
> It seems cc was updated, and I didn't update the submodule accordingly. For
> reference, this is the dependency tree that WFM:

git submodules are just awful IMO.

> $ cargo tree -p qga                            
> qga v0.1.0 (/home/elmarco/src/qemu/qga)
> ├── common v0.1.0 (/home/elmarco/src/qemu/rust/common)
> │   ├── libc v0.2.101
> │   └── nix v0.20.1
> │       ├── bitflags v1.2.1
> │       ├── cfg-if v1.0.0
> │       ├── libc v0.2.101
> │       └── memoffset v0.6.4
> │           [build-dependencies]
> │           └── autocfg v1.0.1
> ├── hostname v0.3.1
> │   ├── libc v0.2.101
> │   └── match_cfg v0.1.0
> └── nix v0.20.1 (*)

With the .config/cargo.toml "replace-with" commented out, I see this:

rustcargo@zealot:~/Rustup/Qemu/qemu.pwt/build$ cargo tree -p qga
qga v0.1.0 (/volatile/rustcargo/Rustup/Qemu/qemu.pwt/qga)
├── common v0.1.0 (/volatile/rustcargo/Rustup/Qemu/qemu.pwt/rust/common)
│   ├── libc v0.2.101
│   └── nix v0.20.1
│   ├── bitflags v1.2.1
│   ├── cfg-if v1.0.0
│   ├── libc v0.2.101
│   └── memoffset v0.6.4
│   [build-dependencies]
│   └── autocfg v1.0.1
├── hostname v0.3.1
│   ├── libc v0.2.101
│   └── match_cfg v0.1.0
└── nix v0.20.1 (*)
rustcargo@zealot:~/Rustup/Qemu/qemu.pwt/build$ 

Which is the same as yours.  Although "cargo build" doesn't work
build, guessed from the messagese that perhaps this was the automatic
codegen hadn't run.  I'm now trying "make" and and it seems to be
running.

With the "replace-with" uncommented, cargo tree bombs out.  I'm afraid
I haven't used cargo vendor so I'm not sure if I am going in the right
direction with this workaround.  Hopefully it will finish the build.

Would it be possible to have a configure option to use unvendored
upstream Rust libraries from crates.io ?

Ian.

Re: [RFC v3 13/32] rust: use vendored-sources

2021-09-08 Thread Marc-André Lureau

Hi

On Wed, Sep 8, 2021 at 7:40 PM Ian Jackson  wrote:

> marcandre.lur...@redhat.com writes ("[RFC v3 13/32] rust: use
> vendored-sources"):
> > Most likely, QEMU will want tighter control over the sources, rather
> > than relying on crates.io downloading, use a git submodule with all the
> > dependencies. However, cargo --offline was added in 1.36.
>
> Hi.
>
> pm215 pointed me at this, as I have some background in Rust.
> I definitely approve of having Rust in Qemu.  I don't have an opinion
> about whether the sources should be vendored this way.
>
> But, I tried to build this, and
>
> error: failed to select a version for the requirement `cc = "=1.0.70"`
> candidate versions found which didn't match: 1.0.69
> location searched: directory source
> `/volatile/rustcargo/Rustup/Qemu/qemu.pwt/rust/vendored` (which is
> replacing registry `crates-io`)
> required by package `nix v0.20.1`
> ... which is depended on by `qga v0.1.0
> (/volatile/rustcargo/Rustup/Qemu/qemu.pwt/qga)`
> perhaps a crate was updated and forgotten to be re-vendored?
> As a reminder, you're using offline mode (--offline) which can
> sometimes cause surprising resolution failures, if this error is too
> confusing you may wish to retry without the offline flag.
>
> I think the most important part here is to get the general APIs,
> presented to general Rust code in Qemu, right.  So I wanted to review
> those via the output from rustdoc.
>
> I tried commenting out the `replace-with` in .cargo/config.toml
> but evidently the systme isn't intended to be used that way.
>
> Ian.
>
>
Hmm, I do "cargo vendor --versioned-dirs ../rust/vendored" to vendor crates.

It seems cc was updated, and I didn't update the submodule accordingly. For
reference, this is the dependency tree that WFM:

$ cargo tree -p qga
qga v0.1.0 (/home/elmarco/src/qemu/qga)
├── common v0.1.0 (/home/elmarco/src/qemu/rust/common)
│   ├── libc v0.2.101
│   └── nix v0.20.1
│   ├── bitflags v1.2.1
│   ├── cfg-if v1.0.0
│   ├── libc v0.2.101
│   └── memoffset v0.6.4
│   [build-dependencies]
│   └── autocfg v1.0.1
├── hostname v0.3.1
│   ├── libc v0.2.101
│   └── match_cfg v0.1.0
└── nix v0.20.1 (*)


-- 
Marc-André Lureau

[PATCH v2 9/9] linux-user: Drop unneeded includes from qemu.h

2021-09-08 Thread Peter Maydell

Trim down the #includes in qemu.h where we can, either by
dropping unneeded headers or by moving them to user-internals.h.

This includes deleting a couple of #includes that appear at
weird points midway through the header file.

Signed-off-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/qemu.h   | 4 
 linux-user/user-internals.h | 2 ++
 thunk.c | 1 +
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index fda90fc28d6..5c713fa8ab2 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -2,7 +2,6 @@
 #define QEMU_H
 
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 
 #undef DEBUG_REMAP
@@ -163,8 +162,6 @@ typedef struct TaskState {
 struct target_sigaltstack sigaltstack_used;
 } __attribute__((aligned(16))) TaskState;
 
-#include "qemu/log.h"
-
 abi_long do_brk(abi_ulong new_brk);
 
 /* user access */
@@ -349,5 +346,4 @@ void *lock_user_string(abi_ulong guest_addr);
 #define unlock_user_struct(host_ptr, guest_addr, copy) \
 unlock_user(host_ptr, guest_addr, (copy) ? sizeof(*host_ptr) : 0)
 
-#include 
 #endif /* QEMU_H */
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
index 1729a8b62e1..661612a088b 100644
--- a/linux-user/user-internals.h
+++ b/linux-user/user-internals.h
@@ -20,6 +20,8 @@
 
 #include "hostdep.h"
 #include "exec/user/thunk.h"
+#include "exec/exec-all.h"
+#include "qemu/log.h"
 
 extern char *exec_path;
 void init_task_state(TaskState *ts);
diff --git a/thunk.c b/thunk.c
index fc5be1a502e..dac4bf11c65 100644
--- a/thunk.c
+++ b/thunk.c
@@ -17,6 +17,7 @@
  * License along with this library; if not, see .
  */
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 
 #include "qemu.h"
 #include "exec/user/thunk.h"
-- 
2.20.1

[PATCH v2 4/9] linux-user: Split loader-related prototypes into loader.h

2021-09-08 Thread Peter Maydell

Split guest-binary loader prototypes out into a new header
loader.h which we include only where required.

Signed-off-by: Peter Maydell 
---
 linux-user/loader.h| 59 ++
 linux-user/qemu.h  | 40 
 linux-user/elfload.c   |  1 +
 linux-user/flatload.c  |  1 +
 linux-user/linuxload.c |  1 +
 linux-user/main.c  |  1 +
 linux-user/signal.c|  1 +
 linux-user/syscall.c   |  1 +
 8 files changed, 65 insertions(+), 40 deletions(-)
 create mode 100644 linux-user/loader.h

diff --git a/linux-user/loader.h b/linux-user/loader.h
new file mode 100644
index 000..f375ee0679b
--- /dev/null
+++ b/linux-user/loader.h
@@ -0,0 +1,59 @@
+/*
+ * loader.h: prototypes for linux-user guest binary loader
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef LINUX_USER_LOADER_H
+#define LINUX_USER_LOADER_H
+
+/*
+ * Read a good amount of data initially, to hopefully get all the
+ * program headers loaded.
+ */
+#define BPRM_BUF_SIZE  1024
+
+/*
+ * This structure is used to hold the arguments that are
+ * used when loading binaries.
+ */
+struct linux_binprm {
+char buf[BPRM_BUF_SIZE] __attribute__((aligned));
+abi_ulong p;
+int fd;
+int e_uid, e_gid;
+int argc, envc;
+char **argv;
+char **envp;
+char *filename;/* Name of binary */
+int (*core_dump)(int, const CPUArchState *); /* coredump routine */
+};
+
+void do_init_thread(struct target_pt_regs *regs, struct image_info *infop);
+abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
+  abi_ulong stringp, int push_ptr);
+int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
+ struct target_pt_regs *regs, struct image_info *infop,
+ struct linux_binprm *);
+
+uint32_t get_elf_eflags(int fd);
+int load_elf_binary(struct linux_binprm *bprm, struct image_info *info);
+int load_flt_binary(struct linux_binprm *bprm, struct image_info *info);
+
+abi_long memcpy_to_target(abi_ulong dest, const void *src,
+  unsigned long len);
+
+extern unsigned long guest_stack_size;
+
+#endif /* LINUX_USER_LOADER_H */
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 76d3f5e7eb9..02c4778c970 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -173,30 +173,6 @@ void stop_all_tasks(void);
 extern const char *qemu_uname_release;
 extern unsigned long mmap_min_addr;
 
-/* ??? See if we can avoid exposing so much of the loader internals.  */
-
-/*
- * Read a good amount of data initially, to hopefully get all the
- * program headers loaded.
- */
-#define BPRM_BUF_SIZE  1024
-
-/*
- * This structure is used to hold the arguments that are
- * used when loading binaries.
- */
-struct linux_binprm {
-char buf[BPRM_BUF_SIZE] __attribute__((aligned));
-abi_ulong p;
-int fd;
-int e_uid, e_gid;
-int argc, envc;
-char **argv;
-char **envp;
-char *filename;/* Name of binary */
-int (*core_dump)(int, const CPUArchState *); /* coredump routine */
-};
-
 typedef struct IOCTLEntry IOCTLEntry;
 
 typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
@@ -217,13 +193,6 @@ extern IOCTLEntry ioctl_entries[];
 #define IOC_W 0x0002
 #define IOC_RW (IOC_R | IOC_W)
 
-void do_init_thread(struct target_pt_regs *regs, struct image_info *infop);
-abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
-  abi_ulong stringp, int push_ptr);
-int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
- struct target_pt_regs *regs, struct image_info *infop,
- struct linux_binprm *);
-
 /*
  * Returns true if the image uses the FDPIC ABI. If this is the case,
  * we have to provide some information (loadmap, pt_dynamic_info) such
@@ -232,12 +201,6 @@ int loader_exec(int fdexec, const char *filename, char 
**argv, char **envp,
  */
 int info_is_fdpic(struct image_info *info);
 
-uint32_t get_elf_eflags(int fd);
-int load_elf_binary(struct linux_binprm *bprm, struct image_info *info);
-int load_flt_binary(struct linux_binprm *bprm, struct image_info *info);
-
-abi_long memcpy_to_target(abi_ulong dest, const void *src,
-  unsigned long len);

[PATCH v2 3/9] linux-user: Split signal-related prototypes into signal-common.h

2021-09-08 Thread Peter Maydell

Split the signal related prototypes into the existing header file
signal-common.h, and include it in those places that now require it.

Signed-off-by: Peter Maydell 
---
v1->v2: use existing signal-common.h instead of new header
---
 linux-user/qemu.h| 36 
 linux-user/signal-common.h   | 36 
 linux-user/aarch64/cpu_loop.c|  1 +
 linux-user/alpha/cpu_loop.c  |  1 +
 linux-user/arm/cpu_loop.c|  1 +
 linux-user/cris/cpu_loop.c   |  1 +
 linux-user/fd-trans.c|  1 +
 linux-user/hexagon/cpu_loop.c|  1 +
 linux-user/hppa/cpu_loop.c   |  1 +
 linux-user/i386/cpu_loop.c   |  1 +
 linux-user/m68k/cpu_loop.c   |  1 +
 linux-user/main.c|  1 +
 linux-user/microblaze/cpu_loop.c |  1 +
 linux-user/mips/cpu_loop.c   |  1 +
 linux-user/nios2/cpu_loop.c  |  1 +
 linux-user/openrisc/cpu_loop.c   |  1 +
 linux-user/ppc/cpu_loop.c|  1 +
 linux-user/riscv/cpu_loop.c  |  1 +
 linux-user/s390x/cpu_loop.c  |  1 +
 linux-user/sh4/cpu_loop.c|  1 +
 linux-user/sparc/cpu_loop.c  |  1 +
 linux-user/syscall.c |  1 +
 linux-user/xtensa/cpu_loop.c |  1 +
 23 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index ad2d49fed9f..76d3f5e7eb9 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -415,42 +415,6 @@ extern long safe_syscall_base(int *pending, long number, 
...);
 /* syscall.c */
 int host_to_target_waitstatus(int status);
 
-/* signal.c */
-void process_pending_signals(CPUArchState *cpu_env);
-void signal_init(void);
-int queue_signal(CPUArchState *env, int sig, int si_type,
- target_siginfo_t *info);
-void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info);
-void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo);
-int target_to_host_signal(int sig);
-int host_to_target_signal(int sig);
-long do_sigreturn(CPUArchState *env);
-long do_rt_sigreturn(CPUArchState *env);
-abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr,
-CPUArchState *env);
-int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset);
-abi_long do_swapcontext(CPUArchState *env, abi_ulong uold_ctx,
-abi_ulong unew_ctx, abi_long ctx_size);
-/**
- * block_signals: block all signals while handling this guest syscall
- *
- * Block all signals, and arrange that the signal mask is returned to
- * its correct value for the guest before we resume execution of guest code.
- * If this function returns non-zero, then the caller should immediately
- * return -TARGET_ERESTARTSYS to the main loop, which will take the pending
- * signal and restart execution of the syscall.
- * If block_signals() returns zero, then the caller can continue with
- * emulation of the system call knowing that no signals can be taken
- * (and therefore that no race conditions will result).
- * This should only be called once, because if it is called a second time
- * it will always return non-zero. (Think of it like a mutex that can't
- * be recursively locked.)
- * Signals will be unblocked again by process_pending_signals().
- *
- * Return value: non-zero if there was a pending signal, zero if not.
- */
-int block_signals(void); /* Returns non zero if signal pending */
-
 #ifdef TARGET_I386
 /* vm86.c */
 void save_v86_state(CPUX86State *env);
diff --git a/linux-user/signal-common.h b/linux-user/signal-common.h
index ea86328b289..58ea23f6ea9 100644
--- a/linux-user/signal-common.h
+++ b/linux-user/signal-common.h
@@ -47,4 +47,40 @@ void setup_frame(int sig, struct target_sigaction *ka,
 void setup_rt_frame(int sig, struct target_sigaction *ka,
 target_siginfo_t *info,
 target_sigset_t *set, CPUArchState *env);
+
+void process_pending_signals(CPUArchState *cpu_env);
+void signal_init(void);
+int queue_signal(CPUArchState *env, int sig, int si_type,
+ target_siginfo_t *info);
+void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info);
+void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo);
+int target_to_host_signal(int sig);
+int host_to_target_signal(int sig);
+long do_sigreturn(CPUArchState *env);
+long do_rt_sigreturn(CPUArchState *env);
+abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr,
+CPUArchState *env);
+int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset);
+abi_long do_swapcontext(CPUArchState *env, abi_ulong uold_ctx,
+abi_ulong unew_ctx, abi_long ctx_size);
+/**
+ * block_signals: block all signals while handling this guest syscall
+ *
+ * Block all signals, and arrange that the signal mask is returned to
+ * its correct value for the guest before we resume execution of guest code.
+ * If this function returns non-zero, then the caller should

Re: [PATCH v1 4/4] tests: rename virtio_seg_max_adjust to virtio_check_params

2021-09-08 Thread Philippe Mathieu-Daudé

On 1/29/20 3:07 PM, Denis Plotnikov wrote:
> Since, virtio_seg_max_adjust checks not only seg_max, but also
> virtqueue_size parameter, let's make the test more general and
> add new parameters to be checked there in the future.
> 
> Signed-off-by: Denis Plotnikov 
> ---
>  .../{virtio_seg_max_adjust.py => virtio_check_params.py}  | 0
>  1 file changed, 0 insertions(+), 0 deletions(-)
>  rename tests/acceptance/{virtio_seg_max_adjust.py => virtio_check_params.py} 
> (100%)

Old one... reminds me of
https://lore.kernel.org/qemu-devel/20200129212345.20547-1-phi...@redhat.com/
:~(

Reviewed-by: Philippe Mathieu-Daudé

[PATCH v2 7/9] linux-user: Split linux-user internals out of qemu.h

2021-09-08 Thread Peter Maydell

qemu.h is included in various non-linux-user files (which
mostly want the TaskState struct and the functions for
doing usermode access to guest addresses like lock_user(),
unlock_user(), get_user*(), etc).

Split out the parts that are only used in linux-user itself
into a new user-internals.h. This leaves qemu.h with basically
three things:
 * the definition of the TaskState struct
 * the user-access functions and macros
 * do_brk()
all of which are needed by code outside linux-user that
includes qemu.h.

The addition of all the extra #include lines was done with
  sed -i '/include.*qemu\.h/a #include "user-internals.h"' $(git grep -l 
'include.*qemu\.h' linux-user)
(and then undoing the change to fpa11.h).

Signed-off-by: Peter Maydell 
---
 linux-user/qemu.h| 164 +--
 linux-user/user-internals.h  | 184 +++
 linux-user/aarch64/cpu_loop.c|   1 +
 linux-user/aarch64/signal.c  |   1 +
 linux-user/alpha/cpu_loop.c  |   1 +
 linux-user/alpha/signal.c|   1 +
 linux-user/arm/cpu_loop.c|   1 +
 linux-user/arm/signal.c  |   1 +
 linux-user/cris/cpu_loop.c   |   1 +
 linux-user/cris/signal.c |   1 +
 linux-user/elfload.c |   1 +
 linux-user/exit.c|   1 +
 linux-user/fd-trans.c|   1 +
 linux-user/flatload.c|   1 +
 linux-user/hexagon/cpu_loop.c|   1 +
 linux-user/hexagon/signal.c  |   1 +
 linux-user/hppa/cpu_loop.c   |   1 +
 linux-user/hppa/signal.c |   1 +
 linux-user/i386/cpu_loop.c   |   1 +
 linux-user/i386/signal.c |   1 +
 linux-user/linuxload.c   |   1 +
 linux-user/m68k/cpu_loop.c   |   1 +
 linux-user/m68k/signal.c |   1 +
 linux-user/main.c|   1 +
 linux-user/microblaze/cpu_loop.c |   1 +
 linux-user/microblaze/signal.c   |   1 +
 linux-user/mips/cpu_loop.c   |   1 +
 linux-user/mips/signal.c |   1 +
 linux-user/mmap.c|   1 +
 linux-user/nios2/cpu_loop.c  |   1 +
 linux-user/nios2/signal.c|   1 +
 linux-user/openrisc/cpu_loop.c   |   1 +
 linux-user/openrisc/signal.c |   1 +
 linux-user/ppc/cpu_loop.c|   1 +
 linux-user/ppc/signal.c  |   1 +
 linux-user/riscv/cpu_loop.c  |   1 +
 linux-user/riscv/signal.c|   1 +
 linux-user/s390x/cpu_loop.c  |   1 +
 linux-user/s390x/signal.c|   1 +
 linux-user/semihost.c|   1 +
 linux-user/sh4/cpu_loop.c|   1 +
 linux-user/sh4/signal.c  |   1 +
 linux-user/signal.c  |   1 +
 linux-user/sparc/cpu_loop.c  |   1 +
 linux-user/sparc/signal.c|   1 +
 linux-user/strace.c  |   1 +
 linux-user/syscall.c |   1 +
 linux-user/uaccess.c |   1 +
 linux-user/uname.c   |   1 +
 linux-user/vm86.c|   1 +
 linux-user/xtensa/cpu_loop.c |   1 +
 linux-user/xtensa/signal.c   |   1 +
 52 files changed, 235 insertions(+), 163 deletions(-)
 create mode 100644 linux-user/user-internals.h

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index a82a46236e6..92290a55c0d 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -1,7 +1,6 @@
 #ifndef QEMU_H
 #define QEMU_H
 
-#include "hostdep.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
@@ -10,7 +9,6 @@
 
 #include "exec/user/abitypes.h"
 
-#include "exec/user/thunk.h"
 #include "syscall_defs.h"
 #include "target_syscall.h"
 #include "exec/gdbstub.h"
@@ -166,93 +164,9 @@ typedef struct TaskState {
 struct target_sigaltstack sigaltstack_used;
 } __attribute__((aligned(16))) TaskState;
 
-extern char *exec_path;
-void init_task_state(TaskState *ts);
-void task_settid(TaskState *);
-void stop_all_tasks(void);
-extern const char *qemu_uname_release;
-extern unsigned long mmap_min_addr;
-
-typedef struct IOCTLEntry IOCTLEntry;
-
-typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
- int fd, int cmd, abi_long arg);
-
-struct IOCTLEntry {
-int target_cmd;
-unsigned int host_cmd;
-const char *name;
-int access;
-do_ioctl_fn *do_ioctl;
-const argtype arg_type[5];
-};
-
-extern IOCTLEntry ioctl_entries[];
-
-#define IOC_R 0x0001
-#define IOC_W 0x0002
-#define IOC_RW (IOC_R | IOC_W)
-
-/*
- * Returns true if the image uses the FDPIC ABI. If this is the case,
- * we have to provide some information (loadmap, pt_dynamic_info) such
- * that the program can be relocated adequately. This is also useful
- * when handling signals.
- */
-int info_is_fdpic(struct image_info *info);
-
-void target_set_brk(abi_ulong new_brk);
-abi_long do_brk(abi_ulong new_brk);
-void syscall_init(void);
-abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
-abi_long arg2, abi_long arg3, abi_long arg4,
-abi_long arg5, abi_long arg6, abi_long arg7,
-abi_long arg8);
-extern

[PATCH v2 0/9] linux-user: split internals out of qemu.h

2021-09-08 Thread Peter Maydell

linux-user/qemu.h is an awkward header, for two reasons:
 (1) its name suggests it's a rather common generic header,
 but it's actually specific to the usermode emulators
 (2) it is a mix of:
   * lots of things internal to the implementation of linux-user
   * functions that a few files outside linux-user want
 (mostly the user-access functions like lock_user,
 get/put_user_*, etc, and also the TaskStruct definition)

This patchset tries to clean it up a bit by at least splitting
most of the "just internal to linux-user" parts out of qemu.h
and putting them in a handful of different .h files that are
then included by the linux-user files that need them.

I think the ideal would probably be to eventually junk
qemu.h entirely and have a few separate headers specifically
for the bits that non-linux-user code needs (eg a 'user-access.h'
for the get/put_user stuff), perhaps located somewhere that
means we don't need to put linux-user/ on the include path.
But that's awkward as it needs interaction with bsd-user too.
So this much cleanup seemed like a reasonable start...

Changes v1->v2:
 * rebased
 * fixed a few minor niggles spotted in v1 during review
 * use existing signal-common.h rather than creating a new
   header for signal-related functions

Patches still needing review: 3, 4, 5, 7

thanks
-- PMM

Peter Maydell (9):
  linux-user: Fix coding style nits in qemu.h
  linux-user: Split strace prototypes into strace.h
  linux-user: Split signal-related prototypes into signal-common.h
  linux-user: Split loader-related prototypes into loader.h
  linux-user: Split mmap prototypes into user-mmap.h
  linux-user: Split safe-syscall macro into its own header
  linux-user: Split linux-user internals out of qemu.h
  linux-user: Don't include gdbstub.h in qemu.h
  linux-user: Drop unneeded includes from qemu.h

 linux-user/loader.h  |  59 +
 linux-user/qemu.h| 429 ++-
 linux-user/safe-syscall.h| 154 +++
 linux-user/signal-common.h   |  36 +++
 linux-user/strace.h  |  38 +++
 linux-user/user-internals.h  | 186 ++
 linux-user/user-mmap.h   |  34 +++
 gdbstub.c|   2 +-
 linux-user/aarch64/cpu_loop.c|   2 +
 linux-user/aarch64/signal.c  |   1 +
 linux-user/alpha/cpu_loop.c  |   2 +
 linux-user/alpha/signal.c|   1 +
 linux-user/arm/cpu_loop.c|   2 +
 linux-user/arm/signal.c  |   1 +
 linux-user/cris/cpu_loop.c   |   2 +
 linux-user/cris/signal.c |   1 +
 linux-user/elfload.c |   3 +
 linux-user/exit.c|   2 +
 linux-user/fd-trans.c|   2 +
 linux-user/flatload.c|   3 +
 linux-user/hexagon/cpu_loop.c|   2 +
 linux-user/hexagon/signal.c  |   1 +
 linux-user/hppa/cpu_loop.c   |   2 +
 linux-user/hppa/signal.c |   1 +
 linux-user/i386/cpu_loop.c   |   3 +
 linux-user/i386/signal.c |   1 +
 linux-user/linuxload.c   |   2 +
 linux-user/m68k/cpu_loop.c   |   2 +
 linux-user/m68k/signal.c |   1 +
 linux-user/main.c|   5 +
 linux-user/microblaze/cpu_loop.c |   2 +
 linux-user/microblaze/signal.c   |   1 +
 linux-user/mips/cpu_loop.c   |   2 +
 linux-user/mips/signal.c |   1 +
 linux-user/mmap.c|   2 +
 linux-user/nios2/cpu_loop.c  |   2 +
 linux-user/nios2/signal.c|   1 +
 linux-user/openrisc/cpu_loop.c   |   2 +
 linux-user/openrisc/signal.c |   1 +
 linux-user/ppc/cpu_loop.c|   2 +
 linux-user/ppc/signal.c  |   1 +
 linux-user/riscv/cpu_loop.c  |   2 +
 linux-user/riscv/signal.c|   1 +
 linux-user/s390x/cpu_loop.c  |   2 +
 linux-user/s390x/signal.c|   1 +
 linux-user/semihost.c|   1 +
 linux-user/sh4/cpu_loop.c|   2 +
 linux-user/sh4/signal.c  |   1 +
 linux-user/signal.c  |   5 +
 linux-user/sparc/cpu_loop.c  |   2 +
 linux-user/sparc/signal.c|   1 +
 linux-user/strace.c  |   3 +
 linux-user/syscall.c |   6 +
 linux-user/uaccess.c |   1 +
 linux-user/uname.c   |   1 +
 linux-user/vm86.c|   1 +
 linux-user/xtensa/cpu_loop.c |   2 +
 linux-user/xtensa/signal.c   |   1 +
 semihosting/arm-compat-semi.c|   2 +-
 target/m68k/m68k-semi.c  |   2 +-
 target/nios2/nios2-semi.c|   2 +-
 thunk.c  |   1 +
 62 files changed, 620 insertions(+), 417 deletions(-)
 create mode 100644 linux-user/loader.h
 create mode 100644 linux-user/safe-syscall.h
 create mode 100644 linux-user/strace.h
 create mode 100644 linux-user/user-internals.h
 create mode 100644 linux-user/user-mmap.h

-- 
2.20.1

[PATCH v2 6/9] linux-user: Split safe-syscall macro into its own header

2021-09-08 Thread Peter Maydell

Split the safe-syscall macro from qemu.h into a new safe-syscall.h.

Signed-off-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/qemu.h | 135 -
 linux-user/safe-syscall.h | 154 ++
 linux-user/syscall.c  |   1 +
 3 files changed, 155 insertions(+), 135 deletions(-)
 create mode 100644 linux-user/safe-syscall.h

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 0cb79990579..a82a46236e6 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -240,141 +240,6 @@ void probe_guest_base(const char *image_name,
 
 #include "qemu/log.h"
 
-/* safe_syscall.S */
-
-/**
- * safe_syscall:
- * @int number: number of system call to make
- * ...: arguments to the system call
- *
- * Call a system call if guest signal not pending.
- * This has the same API as the libc syscall() function, except that it
- * may return -1 with errno == TARGET_ERESTARTSYS if a signal was pending.
- *
- * Returns: the system call result, or -1 with an error code in errno
- * (Errnos are host errnos; we rely on TARGET_ERESTARTSYS not clashing
- * with any of the host errno values.)
- */
-
-/*
- * A guide to using safe_syscall() to handle interactions between guest
- * syscalls and guest signals:
- *
- * Guest syscalls come in two flavours:
- *
- * (1) Non-interruptible syscalls
- *
- * These are guest syscalls that never get interrupted by signals and
- * so never return EINTR. They can be implemented straightforwardly in
- * QEMU: just make sure that if the implementation code has to make any
- * blocking calls that those calls are retried if they return EINTR.
- * It's also OK to implement these with safe_syscall, though it will be
- * a little less efficient if a signal is delivered at the 'wrong' moment.
- *
- * Some non-interruptible syscalls need to be handled using block_signals()
- * to block signals for the duration of the syscall. This mainly applies
- * to code which needs to modify the data structures used by the
- * host_signal_handler() function and the functions it calls, including
- * all syscalls which change the thread's signal mask.
- *
- * (2) Interruptible syscalls
- *
- * These are guest syscalls that can be interrupted by signals and
- * for which we need to either return EINTR or arrange for the guest
- * syscall to be restarted. This category includes both syscalls which
- * always restart (and in the kernel return -ERESTARTNOINTR), ones
- * which only restart if there is no handler (kernel returns -ERESTARTNOHAND
- * or -ERESTART_RESTARTBLOCK), and the most common kind which restart
- * if the handler was registered with SA_RESTART (kernel returns
- * -ERESTARTSYS). System calls which are only interruptible in some
- * situations (like 'open') also need to be handled this way.
- *
- * Here it is important that the host syscall is made
- * via this safe_syscall() function, and *not* via the host libc.
- * If the host libc is used then the implementation will appear to work
- * most of the time, but there will be a race condition where a
- * signal could arrive just before we make the host syscall inside libc,
- * and then then guest syscall will not correctly be interrupted.
- * Instead the implementation of the guest syscall can use the safe_syscall
- * function but otherwise just return the result or errno in the usual
- * way; the main loop code will take care of restarting the syscall
- * if appropriate.
- *
- * (If the implementation needs to make multiple host syscalls this is
- * OK; any which might really block must be via safe_syscall(); for those
- * which are only technically blocking (ie which we know in practice won't
- * stay in the host kernel indefinitely) it's OK to use libc if necessary.
- * You must be able to cope with backing out correctly if some safe_syscall
- * you make in the implementation returns either -TARGET_ERESTARTSYS or
- * EINTR though.)
- *
- * block_signals() cannot be used for interruptible syscalls.
- *
- *
- * How and why the safe_syscall implementation works:
- *
- * The basic setup is that we make the host syscall via a known
- * section of host native assembly. If a signal occurs, our signal
- * handler checks the interrupted host PC against the addresse of that
- * known section. If the PC is before or at the address of the syscall
- * instruction then we change the PC to point at a "return
- * -TARGET_ERESTARTSYS" code path instead, and then exit the signal handler
- * (causing the safe_syscall() call to immediately return that value).
- * Then in the main.c loop if we see this magic return value we adjust
- * the guest PC to wind it back to before the system call, and invoke
- * the guest signal handler as usual.
- *
- * This winding-back will happen in two cases:
- * (1) signal came in just before we took the host syscall (a race);
- *   in this case we'll take the guest signal and have another go
- *   at the syscall afterwards, and this is indistinguishable for

[PULL 05/12] mac_via: move ADB variables to MOS6522Q800VIA1State

2021-09-08 Thread Laurent Vivier

From: Mark Cave-Ayland 

The ADB is accessed using clock and data pins on q800 VIA1 port B and so can be
moved to MOS6522Q800VIA1State.

Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Laurent Vivier 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20210830102447.10806-6-mark.cave-ayl...@ilande.co.uk>
Signed-off-by: Laurent Vivier 
---
 include/hw/misc/mac_via.h |  20 ++---
 hw/m68k/q800.c|   6 +-
 hw/misc/mac_via.c | 169 +++---
 3 files changed, 96 insertions(+), 99 deletions(-)

diff --git a/include/hw/misc/mac_via.h b/include/hw/misc/mac_via.h
index b2c499ed6b5b..182dcb742235 100644
--- a/include/hw/misc/mac_via.h
+++ b/include/hw/misc/mac_via.h
@@ -56,6 +56,16 @@ struct MOS6522Q800VIA1State {
 int wprotect;
 int alt;
 
+/* ADB */
+ADBBusState adb_bus;
+qemu_irq adb_data_ready;
+int adb_data_in_size;
+int adb_data_in_index;
+int adb_data_out_index;
+uint8_t adb_data_in[128];
+uint8_t adb_data_out[16];
+uint8_t adb_autopoll_cmd;
+
 /* external timers */
 QEMUTimer *one_second_timer;
 int64_t next_second;
@@ -102,16 +112,6 @@ struct MacVIAState {
 /* VIAs */
 MOS6522Q800VIA1State mos6522_via1;
 MOS6522Q800VIA2State mos6522_via2;
-
-/* ADB */
-ADBBusState adb_bus;
-qemu_irq adb_data_ready;
-int adb_data_in_size;
-int adb_data_in_index;
-int adb_data_out_index;
-uint8_t adb_data_in[128];
-uint8_t adb_data_out[16];
-uint8_t adb_autopoll_cmd;
 };
 
 #endif
diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index ac0a13060b4a..e14f68f19b13 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -225,7 +225,7 @@ static void q800_init(MachineState *machine)
 hwaddr parameters_base;
 CPUState *cs;
 DeviceState *dev;
-DeviceState *via_dev;
+DeviceState *via_dev, *via1_dev;
 DeviceState *escc_orgate;
 SysBusESPState *sysbus_esp;
 ESPState *esp;
@@ -285,8 +285,8 @@ static void q800_init(MachineState *machine)
 qdev_connect_gpio_out_named(DEVICE(sysbus), "irq", 1,
 qdev_get_gpio_in(glue, 1));
 
-
-adb_bus = qdev_get_child_bus(via_dev, "adb.0");
+via1_dev = DEVICE(MOS6522_Q800_VIA1(_VIA(via_dev)->mos6522_via1));
+adb_bus = qdev_get_child_bus(DEVICE(via1_dev), "adb.0");
 dev = qdev_new(TYPE_ADB_KEYBOARD);
 qdev_realize_and_unref(dev, adb_bus, _fatal);
 dev = qdev_new(TYPE_ADB_MOUSE);
diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c
index 3978e6b44136..b4a65480fdc8 100644
--- a/hw/misc/mac_via.c
+++ b/hw/misc/mac_via.c
@@ -595,10 +595,9 @@ static void via1_rtc_update(MOS6522Q800VIA1State *v1s)
 
 static void adb_via_poll(void *opaque)
 {
-MacVIAState *m = opaque;
-MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(>mos6522_via1);
+MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(opaque);
 MOS6522State *s = MOS6522(v1s);
-ADBBusState *adb_bus = >adb_bus;
+ADBBusState *adb_bus = >adb_bus;
 uint8_t obuf[9];
 uint8_t *data = >sr;
 int olen;
@@ -610,50 +609,50 @@ static void adb_via_poll(void *opaque)
  */
 adb_autopoll_block(adb_bus);
 
-if (m->adb_data_in_size > 0 && m->adb_data_in_index == 0) {
+if (v1s->adb_data_in_size > 0 && v1s->adb_data_in_index == 0) {
 /*
  * For older Linux kernels that switch to IDLE mode after sending the
  * ADB command, detect if there is an existing response and return that
  * as a a "fake" autopoll reply or bus timeout accordingly
  */
-*data = m->adb_data_out[0];
-olen = m->adb_data_in_size;
+*data = v1s->adb_data_out[0];
+olen = v1s->adb_data_in_size;
 
 s->b &= ~VIA1B_vADBInt;
-qemu_irq_raise(m->adb_data_ready);
+qemu_irq_raise(v1s->adb_data_ready);
 } else {
 /*
  * Otherwise poll as normal
  */
-m->adb_data_in_index = 0;
-m->adb_data_out_index = 0;
+v1s->adb_data_in_index = 0;
+v1s->adb_data_out_index = 0;
 olen = adb_poll(adb_bus, obuf, adb_bus->autopoll_mask);
 
 if (olen > 0) {
 /* Autopoll response */
 *data = obuf[0];
 olen--;
-memcpy(m->adb_data_in, [1], olen);
-m->adb_data_in_size = olen;
+memcpy(v1s->adb_data_in, [1], olen);
+v1s->adb_data_in_size = olen;
 
 s->b &= ~VIA1B_vADBInt;
-qemu_irq_raise(m->adb_data_ready);
+qemu_irq_raise(v1s->adb_data_ready);
 } else {
-*data = m->adb_autopoll_cmd;
+*data = v1s->adb_autopoll_cmd;
 obuf[0] = 0xff;
 obuf[1] = 0xff;
 olen = 2;
 
-memcpy(m->adb_data_in, obuf, olen);
-m->adb_data_in_size = olen;
+memcpy(v1s->adb_data_in, obuf, olen);
+v1s->adb_data_in_size = olen;
 
 s->b &= ~VIA1B_vADBInt;
-qemu_irq_raise(m->adb_data_ready);
+

[PATCH v2 5/9] linux-user: Split mmap prototypes into user-mmap.h

2021-09-08 Thread Peter Maydell

Split out the mmap prototypes into a new header user-mmap.h
which we only include where required.

Signed-off-by: Peter Maydell 
---
 linux-user/qemu.h  | 14 --
 linux-user/user-mmap.h | 34 ++
 linux-user/elfload.c   |  1 +
 linux-user/flatload.c  |  1 +
 linux-user/i386/cpu_loop.c |  1 +
 linux-user/main.c  |  1 +
 linux-user/mmap.c  |  1 +
 linux-user/syscall.c   |  1 +
 8 files changed, 40 insertions(+), 14 deletions(-)
 create mode 100644 linux-user/user-mmap.h

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 02c4778c970..0cb79990579 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -389,20 +389,6 @@ void sparc64_set_context(CPUSPARCState *env);
 void sparc64_get_context(CPUSPARCState *env);
 #endif
 
-/* mmap.c */
-int target_mprotect(abi_ulong start, abi_ulong len, int prot);
-abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
- int flags, int fd, abi_ulong offset);
-int target_munmap(abi_ulong start, abi_ulong len);
-abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
-   abi_ulong new_size, unsigned long flags,
-   abi_ulong new_addr);
-extern unsigned long last_brk;
-extern abi_ulong mmap_next_start;
-abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong);
-void mmap_fork_start(void);
-void mmap_fork_end(int child);
-
 /* user access */
 
 #define VERIFY_READ  PAGE_READ
diff --git a/linux-user/user-mmap.h b/linux-user/user-mmap.h
new file mode 100644
index 000..d1dec99c024
--- /dev/null
+++ b/linux-user/user-mmap.h
@@ -0,0 +1,34 @@
+/*
+ * user-mmap.h: prototypes for linux-user guest binary loader
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef LINUX_USER_USER_MMAP_H
+#define LINUX_USER_USER_MMAP_H
+
+int target_mprotect(abi_ulong start, abi_ulong len, int prot);
+abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
+ int flags, int fd, abi_ulong offset);
+int target_munmap(abi_ulong start, abi_ulong len);
+abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
+   abi_ulong new_size, unsigned long flags,
+   abi_ulong new_addr);
+extern unsigned long last_brk;
+extern abi_ulong mmap_next_start;
+abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong);
+void mmap_fork_start(void);
+void mmap_fork_end(int child);
+
+#endif /* LINUX_USER_USER_MMAP_H */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 6244fcd05ce..c291f3cee09 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -7,6 +7,7 @@
 
 #include "qemu.h"
 #include "loader.h"
+#include "user-mmap.h"
 #include "disas/disas.h"
 #include "qemu/bitops.h"
 #include "qemu/path.h"
diff --git a/linux-user/flatload.c b/linux-user/flatload.c
index 7484a4a3543..99550061db8 100644
--- a/linux-user/flatload.c
+++ b/linux-user/flatload.c
@@ -37,6 +37,7 @@
 
 #include "qemu.h"
 #include "loader.h"
+#include "user-mmap.h"
 #include "flat.h"
 #include "target_flat.h"
 
diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c
index ee2e139a063..fcc410a426a 100644
--- a/linux-user/i386/cpu_loop.c
+++ b/linux-user/i386/cpu_loop.c
@@ -22,6 +22,7 @@
 #include "qemu.h"
 #include "cpu_loop-common.h"
 #include "signal-common.h"
+#include "user-mmap.h"
 
 /***/
 /* CPUX86 core interface */
diff --git a/linux-user/main.c b/linux-user/main.c
index 67c5a87ffad..a76aec73368 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -51,6 +51,7 @@
 #include "fd-trans.h"
 #include "signal-common.h"
 #include "loader.h"
+#include "user-mmap.h"
 
 #ifndef AT_FLAGS_PRESERVE_ARGV0
 #define AT_FLAGS_PRESERVE_ARGV0_BIT 0
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 0e103859fed..4b182444bbd 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -20,6 +20,7 @@
 #include "trace.h"
 #include "exec/log.h"
 #include "qemu.h"
+#include "user-mmap.h"
 
 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
 static __thread int mmap_lock_count;
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index b1cd7410d8b..b6c8406e1dc 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -130,6 +130,7 @@
 #include "strace.h"
 #include "signal-common.h"
 #include "loader.h"

[PATCH v2 8/9] linux-user: Don't include gdbstub.h in qemu.h

2021-09-08 Thread Peter Maydell

Currently the linux-user qemu.h pulls in gdbstub.h. There's no real reason
why it should do this; include it directly from the C files which require
it, and drop the include line in qemu.h.

(Note that several of the C files previously relying on this indirect
include were going out of their way to only include gdbstub.h conditionally
on not CONFIG_USER_ONLY!)

Signed-off-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/qemu.h | 1 -
 gdbstub.c | 2 +-
 linux-user/exit.c | 1 +
 linux-user/main.c | 1 +
 linux-user/signal.c   | 2 ++
 semihosting/arm-compat-semi.c | 2 +-
 target/m68k/m68k-semi.c   | 2 +-
 target/nios2/nios2-semi.c | 2 +-
 8 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 92290a55c0d..fda90fc28d6 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -11,7 +11,6 @@
 
 #include "syscall_defs.h"
 #include "target_syscall.h"
-#include "exec/gdbstub.h"
 
 /*
  * This is the size of the host kernel's sigset_t, needed where we make
diff --git a/gdbstub.c b/gdbstub.c
index 5d8e6ae3cd9..36b85aa50e2 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -31,13 +31,13 @@
 #include "qemu/cutils.h"
 #include "qemu/module.h"
 #include "trace/trace-root.h"
+#include "exec/gdbstub.h"
 #ifdef CONFIG_USER_ONLY
 #include "qemu.h"
 #else
 #include "monitor/monitor.h"
 #include "chardev/char.h"
 #include "chardev/char-fe.h"
-#include "exec/gdbstub.h"
 #include "hw/cpu/cluster.h"
 #include "hw/boards.h"
 #endif
diff --git a/linux-user/exit.c b/linux-user/exit.c
index 68a3a6f9df0..fa6ef0b9b44 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -17,6 +17,7 @@
  *  along with this program; if not, see .
  */
 #include "qemu/osdep.h"
+#include "exec/gdbstub.h"
 #include "qemu.h"
 #include "user-internals.h"
 #ifdef CONFIG_GPROF
diff --git a/linux-user/main.c b/linux-user/main.c
index 9edc0b22207..5ce17e423db 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -40,6 +40,7 @@
 #include "qemu/module.h"
 #include "qemu/plugin.h"
 #include "exec/exec-all.h"
+#include "exec/gdbstub.h"
 #include "tcg/tcg.h"
 #include "qemu/timer.h"
 #include "qemu/envlist.h"
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 6af66123d0f..f8346f5ec5f 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -18,6 +18,8 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/bitops.h"
+#include "exec/gdbstub.h"
+
 #include 
 #include 
 
diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c
index 1c29146dcfa..01badea99c8 100644
--- a/semihosting/arm-compat-semi.c
+++ b/semihosting/arm-compat-semi.c
@@ -37,12 +37,12 @@
 #include "semihosting/console.h"
 #include "semihosting/common-semi.h"
 #include "qemu/timer.h"
+#include "exec/gdbstub.h"
 #ifdef CONFIG_USER_ONLY
 #include "qemu.h"
 
 #define COMMON_SEMI_HEAP_SIZE (128 * 1024 * 1024)
 #else
-#include "exec/gdbstub.h"
 #include "qemu/cutils.h"
 #ifdef TARGET_ARM
 #include "hw/arm/boot.h"
diff --git a/target/m68k/m68k-semi.c b/target/m68k/m68k-semi.c
index d919245e4f8..44ec7e4612c 100644
--- a/target/m68k/m68k-semi.c
+++ b/target/m68k/m68k-semi.c
@@ -20,11 +20,11 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
+#include "exec/gdbstub.h"
 #if defined(CONFIG_USER_ONLY)
 #include "qemu.h"
 #define SEMIHOSTING_HEAP_SIZE (128 * 1024 * 1024)
 #else
-#include "exec/gdbstub.h"
 #include "exec/softmmu-semi.h"
 #include "hw/boards.h"
 #endif
diff --git a/target/nios2/nios2-semi.c b/target/nios2/nios2-semi.c
index e508b2fafce..fe5598bae4d 100644
--- a/target/nios2/nios2-semi.c
+++ b/target/nios2/nios2-semi.c
@@ -24,11 +24,11 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
+#include "exec/gdbstub.h"
 #if defined(CONFIG_USER_ONLY)
 #include "qemu.h"
 #else
 #include "qemu-common.h"
-#include "exec/gdbstub.h"
 #include "exec/softmmu-semi.h"
 #endif
 #include "qemu/log.h"
-- 
2.20.1

[PATCH v2 2/9] linux-user: Split strace prototypes into strace.h

2021-09-08 Thread Peter Maydell

The functions implemented in strace.c are only used in a few files in
linux-user; split them out of qemu.h and into a new strace.h header
which we include in the places that need it.

Signed-off-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/qemu.h| 18 --
 linux-user/strace.h  | 38 ++
 linux-user/signal.c  |  1 +
 linux-user/strace.c  |  2 ++
 linux-user/syscall.c |  1 +
 5 files changed, 42 insertions(+), 18 deletions(-)
 create mode 100644 linux-user/strace.h

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 34b975ba502..ad2d49fed9f 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -415,24 +415,6 @@ extern long safe_syscall_base(int *pending, long number, 
...);
 /* syscall.c */
 int host_to_target_waitstatus(int status);
 
-/* strace.c */
-void print_syscall(void *cpu_env, int num,
-   abi_long arg1, abi_long arg2, abi_long arg3,
-   abi_long arg4, abi_long arg5, abi_long arg6);
-void print_syscall_ret(void *cpu_env, int num, abi_long ret,
-   abi_long arg1, abi_long arg2, abi_long arg3,
-   abi_long arg4, abi_long arg5, abi_long arg6);
-/**
- * print_taken_signal:
- * @target_signum: target signal being taken
- * @tinfo: target_siginfo_t which will be passed to the guest for the signal
- *
- * Print strace output indicating that this signal is being taken by the guest,
- * in a format similar to:
- * --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0} ---
- */
-void print_taken_signal(int target_signum, const target_siginfo_t *tinfo);
-
 /* signal.c */
 void process_pending_signals(CPUArchState *cpu_env);
 void signal_init(void);
diff --git a/linux-user/strace.h b/linux-user/strace.h
new file mode 100644
index 000..1e232d07fc8
--- /dev/null
+++ b/linux-user/strace.h
@@ -0,0 +1,38 @@
+/*
+ * strace.h: prototypes for linux-user builtin strace handling
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef LINUX_USER_STRACE_H
+#define LINUX_USER_STRACE_H
+
+void print_syscall(void *cpu_env, int num,
+   abi_long arg1, abi_long arg2, abi_long arg3,
+   abi_long arg4, abi_long arg5, abi_long arg6);
+void print_syscall_ret(void *cpu_env, int num, abi_long ret,
+   abi_long arg1, abi_long arg2, abi_long arg3,
+   abi_long arg4, abi_long arg5, abi_long arg6);
+/**
+ * print_taken_signal:
+ * @target_signum: target signal being taken
+ * @tinfo: target_siginfo_t which will be passed to the guest for the signal
+ *
+ * Print strace output indicating that this signal is being taken by the guest,
+ * in a format similar to:
+ * --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0} ---
+ */
+void print_taken_signal(int target_signum, const target_siginfo_t *tinfo);
+
+#endif /* LINUX_USER_STRACE_H */
diff --git a/linux-user/signal.c b/linux-user/signal.c
index a8faea6f090..ee1934947ac 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -22,6 +22,7 @@
 #include 
 
 #include "qemu.h"
+#include "strace.h"
 #include "trace.h"
 #include "signal-common.h"
 
diff --git a/linux-user/strace.c b/linux-user/strace.c
index cce0a5d1e35..ee3429fae82 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -1,4 +1,5 @@
 #include "qemu/osdep.h"
+
 #include 
 #include 
 #include 
@@ -14,6 +15,7 @@
 #include 
 #include 
 #include "qemu.h"
+#include "strace.h"
 
 struct syscallname {
 int nr;
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ccd3892b2df..4ac2801e495 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -127,6 +127,7 @@
 #include "uname.h"
 
 #include "qemu.h"
+#include "strace.h"
 #include "qemu/guest-random.h"
 #include "qemu/selfmap.h"
 #include "user/syscall-trace.h"
-- 
2.20.1

[PULL 04/12] mac_via: move PRAM/RTC variables to MOS6522Q800VIA1State

2021-09-08 Thread Laurent Vivier

From: Mark Cave-Ayland 

The PRAM/RTC is accessed using clock and data pins on q800 VIA1 port B and so
can be moved to MOS6522Q800VIA1State.

Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Laurent Vivier 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20210830102447.10806-5-mark.cave-ayl...@ilande.co.uk>
Signed-off-by: Laurent Vivier 
---
 include/hw/misc/mac_via.h |  21 +++---
 hw/misc/mac_via.c | 135 +++---
 2 files changed, 77 insertions(+), 79 deletions(-)

diff --git a/include/hw/misc/mac_via.h b/include/hw/misc/mac_via.h
index bd1c65d1b9c6..b2c499ed6b5b 100644
--- a/include/hw/misc/mac_via.h
+++ b/include/hw/misc/mac_via.h
@@ -46,6 +46,16 @@ struct MOS6522Q800VIA1State {
 BlockBackend *blk;
 VMChangeStateEntry *vmstate;
 
+uint32_t tick_offset;
+
+uint8_t data_out;
+int data_out_cnt;
+uint8_t data_in;
+uint8_t data_in_cnt;
+uint8_t cmd;
+int wprotect;
+int alt;
+
 /* external timers */
 QEMUTimer *one_second_timer;
 int64_t next_second;
@@ -93,17 +103,6 @@ struct MacVIAState {
 MOS6522Q800VIA1State mos6522_via1;
 MOS6522Q800VIA2State mos6522_via2;
 
-/* RTC */
-uint32_t tick_offset;
-
-uint8_t data_out;
-int data_out_cnt;
-uint8_t data_in;
-uint8_t data_in_cnt;
-uint8_t cmd;
-int wprotect;
-int alt;
-
 /* ADB */
 ADBBusState adb_bus;
 qemu_irq adb_data_ready;
diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c
index abbe99af11f6..3978e6b44136 100644
--- a/hw/misc/mac_via.c
+++ b/hw/misc/mac_via.c
@@ -433,9 +433,8 @@ static int via1_rtc_compact_cmd(uint8_t value)
 return REG_INVALID;
 }
 
-static void via1_rtc_update(MacVIAState *m)
+static void via1_rtc_update(MOS6522Q800VIA1State *v1s)
 {
-MOS6522Q800VIA1State *v1s = >mos6522_via1;
 MOS6522State *s = MOS6522(v1s);
 int cmd, sector, addr;
 uint32_t time;
@@ -447,40 +446,40 @@ static void via1_rtc_update(MacVIAState *m)
 if (s->dirb & VIA1B_vRTCData) {
 /* send bits to the RTC */
 if (!(v1s->last_b & VIA1B_vRTCClk) && (s->b & VIA1B_vRTCClk)) {
-m->data_out <<= 1;
-m->data_out |= s->b & VIA1B_vRTCData;
-m->data_out_cnt++;
+v1s->data_out <<= 1;
+v1s->data_out |= s->b & VIA1B_vRTCData;
+v1s->data_out_cnt++;
 }
-trace_via1_rtc_update_data_out(m->data_out_cnt, m->data_out);
+trace_via1_rtc_update_data_out(v1s->data_out_cnt, v1s->data_out);
 } else {
-trace_via1_rtc_update_data_in(m->data_in_cnt, m->data_in);
+trace_via1_rtc_update_data_in(v1s->data_in_cnt, v1s->data_in);
 /* receive bits from the RTC */
 if ((v1s->last_b & VIA1B_vRTCClk) &&
 !(s->b & VIA1B_vRTCClk) &&
-m->data_in_cnt) {
+v1s->data_in_cnt) {
 s->b = (s->b & ~VIA1B_vRTCData) |
-   ((m->data_in >> 7) & VIA1B_vRTCData);
-m->data_in <<= 1;
-m->data_in_cnt--;
+   ((v1s->data_in >> 7) & VIA1B_vRTCData);
+v1s->data_in <<= 1;
+v1s->data_in_cnt--;
 }
 return;
 }
 
-if (m->data_out_cnt != 8) {
+if (v1s->data_out_cnt != 8) {
 return;
 }
 
-m->data_out_cnt = 0;
+v1s->data_out_cnt = 0;
 
-trace_via1_rtc_internal_status(m->cmd, m->alt, m->data_out);
+trace_via1_rtc_internal_status(v1s->cmd, v1s->alt, v1s->data_out);
 /* first byte: it's a command */
-if (m->cmd == REG_EMPTY) {
+if (v1s->cmd == REG_EMPTY) {
 
-cmd = via1_rtc_compact_cmd(m->data_out);
+cmd = via1_rtc_compact_cmd(v1s->data_out);
 trace_via1_rtc_internal_cmd(cmd);
 
 if (cmd == REG_INVALID) {
-trace_via1_rtc_cmd_invalid(m->data_out);
+trace_via1_rtc_cmd_invalid(v1s->data_out);
 return;
 }
 
@@ -492,20 +491,20 @@ static void via1_rtc_update(MacVIAState *m)
  * register 3 is highest-order byte
  */
 
-time = m->tick_offset + (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)
+time = v1s->tick_offset + 
(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)
/ NANOSECONDS_PER_SECOND);
 trace_via1_rtc_internal_time(time);
-m->data_in = (time >> ((cmd & 0x03) << 3)) & 0xff;
-m->data_in_cnt = 8;
+v1s->data_in = (time >> ((cmd & 0x03) << 3)) & 0xff;
+v1s->data_in_cnt = 8;
 trace_via1_rtc_cmd_seconds_read((cmd & 0x7f) - REG_0,
-m->data_in);
+v1s->data_in);
 break;
 case REG_PRAM_ADDR...REG_PRAM_ADDR_LAST:
 /* PRAM address 0x00 -> 0x13 */
-m->data_in = v1s->PRAM[(cmd & 0x7f) - REG_PRAM_ADDR];
-m->data_in_cnt = 8;
+v1s->data_in =

[PATCH v2 1/9] linux-user: Fix coding style nits in qemu.h

2021-09-08 Thread Peter Maydell

We're about to move a lot of the code in qemu.h out into different
header files; fix the coding style nits first so that checkpatch
is happy with the pure code-movement patches. This is mostly
block-comment style but also a few whitespace issues.

Signed-off-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/qemu.h | 47 ++-
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 3b0b6b75fe8..34b975ba502 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -15,12 +15,14 @@
 #include "target_syscall.h"
 #include "exec/gdbstub.h"
 
-/* This is the size of the host kernel's sigset_t, needed where we make
+/*
+ * This is the size of the host kernel's sigset_t, needed where we make
  * direct system calls that take a sigset_t pointer and a size.
  */
 #define SIGSET_T_SIZE (_NSIG / 8)
 
-/* This struct is used to hold certain information about the image.
+/*
+ * This struct is used to hold certain information about the image.
  * Basically, it replicates in user space what would be certain
  * task_struct fields in the kernel
  */
@@ -48,13 +50,13 @@ struct image_info {
 abi_ulong   env_strings;
 abi_ulong   file_string;
 uint32_telf_flags;
-intpersonality;
+int personality;
 abi_ulong   alignment;
 
 /* The fields below are used in FDPIC mode.  */
 abi_ulong   loadmap_addr;
 uint16_tnsegs;
-void   *loadsegs;
+void*loadsegs;
 abi_ulong   pt_dynamic_addr;
 abi_ulong   interpreter_loadmap_addr;
 abi_ulong   interpreter_pt_dynamic_addr;
@@ -98,8 +100,10 @@ struct emulated_sigtable {
 target_siginfo_t info;
 };
 
-/* NOTE: we force a big alignment so that the stack stored after is
-   aligned too */
+/*
+ * NOTE: we force a big alignment so that the stack stored after is
+ * aligned too
+ */
 typedef struct TaskState {
 pid_t ts_tid; /* tid (or pid) of this task */
 #ifdef TARGET_ARM
@@ -134,20 +138,23 @@ typedef struct TaskState {
 
 struct emulated_sigtable sync_signal;
 struct emulated_sigtable sigtab[TARGET_NSIG];
-/* This thread's signal mask, as requested by the guest program.
+/*
+ * This thread's signal mask, as requested by the guest program.
  * The actual signal mask of this thread may differ:
  *  + we don't let SIGSEGV and SIGBUS be blocked while running guest code
  *  + sometimes we block all signals to avoid races
  */
 sigset_t signal_mask;
-/* The signal mask imposed by a guest sigsuspend syscall, if we are
+/*
+ * The signal mask imposed by a guest sigsuspend syscall, if we are
  * currently in the middle of such a syscall
  */
 sigset_t sigsuspend_mask;
 /* Nonzero if we're leaving a sigsuspend and sigsuspend_mask is valid. */
 int in_sigsuspend;
 
-/* Nonzero if process_pending_signals() needs to do something (either
+/*
+ * Nonzero if process_pending_signals() needs to do something (either
  * handle a pending signal or unblock signals).
  * This flag is written from a signal handler so should be accessed via
  * the qatomic_read() and qatomic_set() functions. (It is not accessed
@@ -168,8 +175,10 @@ extern unsigned long mmap_min_addr;
 
 /* ??? See if we can avoid exposing so much of the loader internals.  */
 
-/* Read a good amount of data initially, to hopefully get all the
-   program headers loaded.  */
+/*
+ * Read a good amount of data initially, to hopefully get all the
+ * program headers loaded.
+ */
 #define BPRM_BUF_SIZE  1024
 
 /*
@@ -184,7 +193,7 @@ struct linux_binprm {
 int argc, envc;
 char **argv;
 char **envp;
-char * filename;/* Name of binary */
+char *filename;/* Name of binary */
 int (*core_dump)(int, const CPUArchState *); /* coredump routine */
 };
 
@@ -212,10 +221,11 @@ void do_init_thread(struct target_pt_regs *regs, struct 
image_info *infop);
 abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
   abi_ulong stringp, int push_ptr);
 int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
- struct target_pt_regs * regs, struct image_info *infop,
+ struct target_pt_regs *regs, struct image_info *infop,
  struct linux_binprm *);
 
-/* Returns true if the image uses the FDPIC ABI. If this is the case,
+/*
+ * Returns true if the image uses the FDPIC ABI. If this is the case,
  * we have to provide some information (loadmap, pt_dynamic_info) such
  * that the program can be relocated adequately. This is also useful
  * when handling signals.
@@ -283,7 +293,8 @@ void probe_guest_base(const char *image_name,
  * with any of the host errno values.)
  */
 
-/* A guide to using safe_syscall()

[PULL 11/12] mac_via: rename VIA2_IRQ_SLOT_BIT to VIA2_IRQ_NUBUS_BIT

2021-09-08 Thread Laurent Vivier

From: Mark Cave-Ayland 

Also improve the alignment of the shifted constants.

Signed-off-by: Mark Cave-Ayland 
Reviewed-by: Laurent Vivier 
Message-Id: <20210830102447.10806-12-mark.cave-ayl...@ilande.co.uk>
Signed-off-by: Laurent Vivier 
---
 include/hw/misc/mac_via.h | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/hw/misc/mac_via.h b/include/hw/misc/mac_via.h
index 9a8bca056ef3..5168e3ce665c 100644
--- a/include/hw/misc/mac_via.h
+++ b/include/hw/misc/mac_via.h
@@ -26,11 +26,11 @@
 
 #define VIA1_IRQ_NB 8
 
-#define VIA1_IRQ_ONE_SECOND (1 << VIA1_IRQ_ONE_SECOND_BIT)
-#define VIA1_IRQ_60HZ   (1 << VIA1_IRQ_60HZ_BIT)
-#define VIA1_IRQ_ADB_READY  (1 << VIA1_IRQ_ADB_READY_BIT)
-#define VIA1_IRQ_ADB_DATA   (1 << VIA1_IRQ_ADB_DATA_BIT)
-#define VIA1_IRQ_ADB_CLOCK  (1 << VIA1_IRQ_ADB_CLOCK_BIT)
+#define VIA1_IRQ_ONE_SECOND (1 << VIA1_IRQ_ONE_SECOND_BIT)
+#define VIA1_IRQ_60HZ   (1 << VIA1_IRQ_60HZ_BIT)
+#define VIA1_IRQ_ADB_READY  (1 << VIA1_IRQ_ADB_READY_BIT)
+#define VIA1_IRQ_ADB_DATA   (1 << VIA1_IRQ_ADB_DATA_BIT)
+#define VIA1_IRQ_ADB_CLOCK  (1 << VIA1_IRQ_ADB_CLOCK_BIT)
 
 
 #define TYPE_MOS6522_Q800_VIA1 "mos6522-q800-via1"
@@ -80,18 +80,18 @@ struct MOS6522Q800VIA1State {
 
 /* VIA 2 */
 #define VIA2_IRQ_SCSI_DATA_BIT  0
-#define VIA2_IRQ_SLOT_BIT   1
+#define VIA2_IRQ_NUBUS_BIT  1
 #define VIA2_IRQ_UNUSED_BIT 2
 #define VIA2_IRQ_SCSI_BIT   3
 #define VIA2_IRQ_ASC_BIT4
 
 #define VIA2_IRQ_NB 8
 
-#define VIA2_IRQ_SCSI_DATA  (1 << VIA2_IRQ_SCSI_DATA_BIT)
-#define VIA2_IRQ_SLOT   (1 << VIA2_IRQ_SLOT_BIT)
-#define VIA2_IRQ_UNUSED (1 << VIA2_IRQ_SCSI_BIT)
-#define VIA2_IRQ_SCSI   (1 << VIA2_IRQ_UNUSED_BIT)
-#define VIA2_IRQ_ASC(1 << VIA2_IRQ_ASC_BIT)
+#define VIA2_IRQ_SCSI_DATA  (1 << VIA2_IRQ_SCSI_DATA_BIT)
+#define VIA2_IRQ_NUBUS  (1 << VIA2_IRQ_NUBUS_BIT)
+#define VIA2_IRQ_UNUSED (1 << VIA2_IRQ_SCSI_BIT)
+#define VIA2_IRQ_SCSI   (1 << VIA2_IRQ_UNUSED_BIT)
+#define VIA2_IRQ_ASC(1 << VIA2_IRQ_ASC_BIT)
 
 #define TYPE_MOS6522_Q800_VIA2 "mos6522-q800-via2"
 OBJECT_DECLARE_SIMPLE_TYPE(MOS6522Q800VIA2State, MOS6522_Q800_VIA2)
-- 
2.31.1

1 2 3 4 >

1 - 100 of 320 matches

Mail list logo