date:20220714

Re: [PATCH v2 09/19] vhost: Track number of descs in SVQElement

2022-07-14 Thread Eugenio Perez Martin

On Fri, Jul 15, 2022 at 6:10 AM Jason Wang  wrote:
>
> On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
> >
> > Since CVQ will be able to modify elements, the number of descriptors in
> > the guest may not match with the number of descriptors exposed. Track
> > separately.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >  hw/virtio/vhost-shadow-virtqueue.h |  6 ++
> >  hw/virtio/vhost-shadow-virtqueue.c | 10 +-
> >  2 files changed, 11 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> > b/hw/virtio/vhost-shadow-virtqueue.h
> > index f35d4b8f90..143c86a568 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > @@ -17,6 +17,12 @@
> >
> >  typedef struct SVQElement {
> >  VirtQueueElement elem;
> > +
> > +/*
> > + * Number of descriptors exposed to the device. May or may not match
> > + * guest's
> > + */
> > +unsigned int ndescs;
> >  } SVQElement;
>
> Can we simplify things furtherly by moving ndscs into a dedicated array at 
> svq?
>
> Then we don't need to bother with introducing SVQElement.
>

Yes, I'll move to a desc_state.

Thanks!

> Thanks
>
> >
> >  /* Shadow virtqueue to relay notifications */
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index 442ca3cbd3..3b112c4ec8 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -243,10 +243,10 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, 
> > const struct iovec *out_sg,
> >size_t in_num, SVQElement *svq_elem)
> >  {
> >  unsigned qemu_head;
> > -unsigned ndescs = in_num + out_num;
> > +svq_elem->ndescs = in_num + out_num;
> >  bool ok;
> >
> > -if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
> > +if (unlikely(svq_elem->ndescs > vhost_svq_available_slots(svq))) {
> >  return -ENOSPC;
> >  }
> >
> > @@ -393,7 +393,7 @@ static SVQElement 
> > *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
> >  SVQElement *elem;
> >  const vring_used_t *used = svq->vring.used;
> >  vring_used_elem_t used_elem;
> > -uint16_t last_used, last_used_chain, num;
> > +uint16_t last_used, last_used_chain;
> >
> >  if (!vhost_svq_more_used(svq)) {
> >  return NULL;
> > @@ -420,8 +420,8 @@ static SVQElement 
> > *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
> >  }
> >
> >  elem = svq->ring_id_maps[used_elem.id];
> > -num = elem->elem.in_num + elem->elem.out_num;
> > -last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
> > +last_used_chain = vhost_svq_last_desc_of_chain(svq, elem->ndescs,
> > +   used_elem.id);
> >  svq->desc_next[last_used_chain] = svq->free_head;
> >  svq->free_head = used_elem.id;
> >
> > --
> > 2.31.1
> >
>

Re: [PATCH v2 18/19] vdpa: Add device migration blocker

2022-07-14 Thread Eugenio Perez Martin

On Fri, Jul 15, 2022 at 6:03 AM Jason Wang  wrote:
>
> On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
> >
> > Since the vhost-vdpa device is exposing _F_LOG,
>
> I may miss something but I think it doesn't?
>

It's at vhost_vdpa_get_features. As long as SVQ is enabled, it's
exposing VHOST_F_LOG_ALL.

Thanks!

> Note that the features were fetched from the vDPA parent.
>
> Thanks
>
> > adding a migration blocker if
> > it uses CVQ.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >  include/hw/virtio/vhost-vdpa.h |  1 +
> >  hw/virtio/vhost-vdpa.c | 14 ++
> >  2 files changed, 15 insertions(+)
> >
> > diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> > index d85643..d10a89303e 100644
> > --- a/include/hw/virtio/vhost-vdpa.h
> > +++ b/include/hw/virtio/vhost-vdpa.h
> > @@ -35,6 +35,7 @@ typedef struct vhost_vdpa {
> >  bool shadow_vqs_enabled;
> >  /* IOVA mapping used by the Shadow Virtqueue */
> >  VhostIOVATree *iova_tree;
> > +Error *migration_blocker;
> >  GPtrArray *shadow_vqs;
> >  const VhostShadowVirtqueueOps *shadow_vq_ops;
> >  void *shadow_vq_ops_opaque;
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index beaaa7049a..795ed5a049 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -20,6 +20,7 @@
> >  #include "hw/virtio/vhost-shadow-virtqueue.h"
> >  #include "hw/virtio/vhost-vdpa.h"
> >  #include "exec/address-spaces.h"
> > +#include "migration/blocker.h"
> >  #include "qemu/cutils.h"
> >  #include "qemu/main-loop.h"
> >  #include "cpu.h"
> > @@ -1022,6 +1023,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev 
> > *dev)
> >  return true;
> >  }
> >
> > +if (v->migration_blocker) {
> > +int r = migrate_add_blocker(v->migration_blocker, );
> > +if (unlikely(r < 0)) {
> > +goto err_migration_blocker;
> > +}
> > +}
> > +
> >  for (i = 0; i < v->shadow_vqs->len; ++i) {
> >  VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
> >  VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
> > @@ -1064,6 +1072,9 @@ err:
> >  vhost_svq_stop(svq);
> >  }
> >
> > +err_migration_blocker:
> > +error_reportf_err(err, "Cannot setup SVQ %u: ", i);
> > +
> >  return false;
> >  }
> >
> > @@ -1083,6 +1094,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev 
> > *dev)
> >  }
> >  }
> >
> > +if (v->migration_blocker) {
> > +migrate_del_blocker(v->migration_blocker);
> > +}
> >  return true;
> >  }
> >
> > --
> > 2.31.1
> >
>

Re: [PATCH v2 12/19] vhost: add vhost_svq_poll

2022-07-14 Thread Eugenio Perez Martin

On Fri, Jul 15, 2022 at 5:59 AM Jason Wang  wrote:
>
> On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
> >
> > It allows the Shadow Control VirtQueue to wait for the device to use the
> > available buffers.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >  hw/virtio/vhost-shadow-virtqueue.h |  1 +
> >  hw/virtio/vhost-shadow-virtqueue.c | 22 ++
> >  2 files changed, 23 insertions(+)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> > b/hw/virtio/vhost-shadow-virtqueue.h
> > index 1692541cbb..b5c6e3b3b4 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > @@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, const 
> > SVQElement *elem,
> >  int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
> >size_t out_num, const struct iovec *in_sg, size_t in_num,
> >SVQElement *elem);
> > +size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
> >
> >  void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
> >  void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index 5244896358..31a267f721 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -486,6 +486,28 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
> >  } while (!vhost_svq_enable_notification(svq));
> >  }
> >
> > +/**
> > + * Poll the SVQ for one device used buffer.
> > + *
> > + * This function race with main event loop SVQ polling, so extra
> > + * synchronization is needed.
> > + *
> > + * Return the length written by the device.
> > + */
> > +size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
> > +{
> > +do {
> > +uint32_t len;
> > +SVQElement *elem = vhost_svq_get_buf(svq, );
> > +if (elem) {
> > +return len;
> > +}
> > +
> > +/* Make sure we read new used_idx */
> > +smp_rmb();
>
> There's already one smp_rmb(0 in vhost_svq_get_buf(). So this seems useless?
>

That rmb is after checking for new entries with (vq->last_used_idx !=
svq->shadow_used_idx) , to avoid reordering used_idx read with the
actual used entry. So my understanding is
that the compiler is free to skip that check within the while loop.

Maybe the right solution is to add it in vhost_svq_more_used after the
condition (vq->last_used_idx != svq->shadow_used_idx) is false?

Thanks!


> Thanks
>
> > +} while (true);
> > +}
> > +
> >  /**
> >   * Forward used buffers.
> >   *
> > --
> > 2.31.1
> >
>

Re: [PATCH v2 15/19] vdpa: manual forward CVQ buffers

2022-07-14 Thread Eugenio Perez Martin

On Fri, Jul 15, 2022 at 6:08 AM Jason Wang  wrote:
>
> On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
> >
> > Do a simple forwarding of CVQ buffers, the same work SVQ could do but
> > through callbacks. No functional change intended.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >  include/hw/virtio/vhost-vdpa.h |  3 ++
> >  hw/virtio/vhost-vdpa.c |  3 +-
> >  net/vhost-vdpa.c   | 58 ++
> >  3 files changed, 63 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> > index 7214eb47dc..d85643 100644
> > --- a/include/hw/virtio/vhost-vdpa.h
> > +++ b/include/hw/virtio/vhost-vdpa.h
> > @@ -15,6 +15,7 @@
> >  #include 
> >
> >  #include "hw/virtio/vhost-iova-tree.h"
> > +#include "hw/virtio/vhost-shadow-virtqueue.h"
> >  #include "hw/virtio/virtio.h"
> >  #include "standard-headers/linux/vhost_types.h"
> >
> > @@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
> >  /* IOVA mapping used by the Shadow Virtqueue */
> >  VhostIOVATree *iova_tree;
> >  GPtrArray *shadow_vqs;
> > +const VhostShadowVirtqueueOps *shadow_vq_ops;
> > +void *shadow_vq_ops_opaque;
> >  struct vhost_dev *dev;
> >  VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> >  } VhostVDPA;
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 96997210be..beaaa7049a 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -419,7 +419,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
> > struct vhost_vdpa *v,
> >  for (unsigned n = 0; n < hdev->nvqs; ++n) {
> >  g_autoptr(VhostShadowVirtqueue) svq;
> >
> > -svq = vhost_svq_new(v->iova_tree, NULL, NULL);
> > +svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
> > +v->shadow_vq_ops_opaque);
> >  if (unlikely(!svq)) {
> >  error_setg(errp, "Cannot create svq %u", n);
> >  return -1;
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index df1e69ee72..805c9dd6b6 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -11,11 +11,14 @@
> >
> >  #include "qemu/osdep.h"
> >  #include "clients.h"
> > +#include "hw/virtio/virtio-net.h"
> >  #include "net/vhost_net.h"
> >  #include "net/vhost-vdpa.h"
> >  #include "hw/virtio/vhost-vdpa.h"
> >  #include "qemu/config-file.h"
> >  #include "qemu/error-report.h"
> > +#include "qemu/log.h"
> > +#include "qemu/memalign.h"
> >  #include "qemu/option.h"
> >  #include "qapi/error.h"
> >  #include 
> > @@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = {
> >  .check_peer_type = vhost_vdpa_check_peer_type,
> >  };
> >
> > +/**
> > + * Forward buffer for the moment.
> > + */
> > +static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> > +SVQElement *svq_elem, void 
> > *opaque)
> > +{
> > +VirtQueueElement *elem = _elem->elem;
> > +unsigned int n = elem->out_num + elem->in_num;
> > +g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
> > +size_t in_len, dev_written;
> > +virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > +int r;
> > +
> > +memcpy(dev_buffers, elem->out_sg, elem->out_num);
> > +memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
> > +
> > +r = vhost_svq_add(svq, _buffers[0], elem->out_num, _buffers[1],
> > +  elem->in_num, svq_elem);
> > +if (unlikely(r != 0)) {
> > +if (unlikely(r == -ENOSPC)) {
> > +qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device 
> > queue\n",
> > +  __func__);
> > +}
> > +goto out;
> > +}
> > +
> > +/*
> > + * We can poll here since we've had BQL from the time we sent the
> > + * descriptor. Also, we need to take the answer before SVQ pulls by 
> > itself,
> > + * when BQL is released
> > + */
> > +dev_written = vhost_svq_poll(svq);
> > +if (unlikely(dev_written < sizeof(status))) {
> > +error_report("Insufficient written data (%zu)", dev_written);
> > +}
> > +
> > +out:
> > +in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, ,
> > +  sizeof(status));
> > +if (unlikely(in_len < sizeof(status))) {
> > +error_report("Bad device CVQ written length");
> > +}
> > +vhost_svq_push_elem(svq, svq_elem, MIN(in_len, sizeof(status)));
> > +g_free(svq_elem);
> > +return r;
> > +}
> > +
> > +static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
> > +.avail_handler = vhost_vdpa_net_handle_ctrl_avail,
> > +};
>
> I wonder if it's possible to even remove this handler. Can we let the
> kick to be handled by virtio_net_handler_ctrl() in virtio-net.c?
>

I kind of drafted that here:
https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg02652.html

But I'm not sure about the part of not enabling the guest

Re: [PATCH v2 07/11] acpi/tests/bits: add python test that exercizes QEMU bios tables using biosbits

2022-07-14 Thread Ani Sinha




On Thu, 14 Jul 2022, Michael S. Tsirkin wrote:

> On Sun, Jul 10, 2022 at 10:30:10PM +0530, Ani Sinha wrote:
> > This change adds python based test environment that can be used to run 
> > pytest
> > from within a virtual environment. A bash script sets up a virtual 
> > environment
> > and then runs the python based tests from within that environment.
> > All dependent python packages are installed in the virtual environment using
> > pip python module. QEMU python test modules are also available in the 
> > environment
> > for spawning the QEMU based VMs.
> >
> > It also introduces QEMU acpi/smbios biosbits python test script which is run
> > from within the python virtual environment. When the bios bits tests are 
> > run,
> > bios bits binaries are downloaded from an external repo/location.
> > Currently, the test points to an external private github repo where the bits
> > archives are checked in.
> >
> > Signed-off-by: Ani Sinha 
> > ---
> >  tests/pytest/acpi-bits/acpi-bits-test-venv.sh |  59 +++
> >  tests/pytest/acpi-bits/acpi-bits-test.py  | 382 ++
> >  tests/pytest/acpi-bits/meson.build|  33 ++
> >  tests/pytest/acpi-bits/requirements.txt   |   1 +
> >  4 files changed, 475 insertions(+)
> >  create mode 100644 tests/pytest/acpi-bits/acpi-bits-test-venv.sh
> >  create mode 100644 tests/pytest/acpi-bits/acpi-bits-test.py
> >  create mode 100644 tests/pytest/acpi-bits/meson.build
> >  create mode 100644 tests/pytest/acpi-bits/requirements.txt
> >
> > diff --git a/tests/pytest/acpi-bits/acpi-bits-test-venv.sh 
> > b/tests/pytest/acpi-bits/acpi-bits-test-venv.sh
> > new file mode 100644
> > index 00..186395473b
> > --- /dev/null
> > +++ b/tests/pytest/acpi-bits/acpi-bits-test-venv.sh
> > @@ -0,0 +1,59 @@
> > +#!/usr/bin/env bash
> > +# Generates a python virtual environment for the test to run.
> > +# Then runs python test scripts from within that virtual environment.
> > +#
> > +# This program is free software; you can redistribute it and/or modify
> > +# it under the terms of the GNU General Public License as published by
> > +# the Free Software Foundation; either version 2 of the License, or
> > +# (at your option) any later version.
> > +#
> > +# This program is distributed in the hope that it will be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +# GNU General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU General Public License
> > +# along with this program.  If not, see .
> > +#
> > +# Author: Ani Sinha 
> > +
> > +set -e
> > +
> > +MYPATH=$(realpath ${BASH_SOURCE:-$0})
> > +MYDIR=$(dirname $MYPATH)
> > +
> > +if [ -z "$PYTEST_SOURCE_ROOT" ]; then
> > +echo -n "Please set QTEST_SOURCE_ROOT env pointing"
> > +echo " to the root of the qemu source tree."
> > +echo -n "This is required so that the test can find the "
> > +echo "python modules that it needs for execution."
> > +exit 1
> > +fi
> > +SRCDIR=$PYTEST_SOURCE_ROOT
> > +TESTSCRIPTS=("acpi-bits-test.py")
> > +PIPCMD="-m pip -q --disable-pip-version-check"
> > +# we need to save the old value of PWD before we do a change-dir later
> > +PYTEST_PWD=$PWD
> > +
> > +TESTS_PYTHON=/usr/bin/python3
> > +TESTS_VENV_REQ=requirements.txt
> > +
> > +# sadly for pip -e and -t options do not work together.
> > +# please see https://github.com/pypa/pip/issues/562
> > +cd $MYDIR
> > +
> > +$TESTS_PYTHON -m venv .
> > +$TESTS_PYTHON $PIPCMD install -e $SRCDIR/python/
> > +[ -f $TESTS_VENV_REQ ] && \
> > +$TESTS_PYTHON $PIPCMD install -r $TESTS_VENV_REQ || exit 0
> > +
> > +# venv is activated at this point.
> > +
> > +# run the test
> > +for testscript in ${TESTSCRIPTS[@]} ; do
> > +export PYTEST_PWD; python3 $testscript
> > +done
> > +
> > +cd $PYTEST_PWD
> > +
> > +exit 0
> > diff --git a/tests/pytest/acpi-bits/acpi-bits-test.py 
> > b/tests/pytest/acpi-bits/acpi-bits-test.py
> > new file mode 100644
> > index 00..97e61eb709
> > --- /dev/null
> > +++ b/tests/pytest/acpi-bits/acpi-bits-test.py
> > @@ -0,0 +1,382 @@
> > +#!/usr/bin/env python3
> > +# group: rw quick
> > +# Exercize QEMU generated ACPI/SMBIOS tables using biosbits,
> > +# https://biosbits.org/
> > +#
> > +# This program is free software; you can redistribute it and/or modify
> > +# it under the terms of the GNU General Public License as published by
> > +# the Free Software Foundation; either version 2 of the License, or
> > +# (at your option) any later version.
> > +#
> > +# This program is distributed in the hope that it will be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +# GNU General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU General Public License
> > +# along with this

Re: [PATCH v2 19/19] vdpa: Add x-svq to NetdevVhostVDPAOptions

2022-07-14 Thread Jason Wang

On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
>
> Finally offering the possibility to enable SVQ from the command line.
>
> Signed-off-by: Eugenio Pérez 
> Acked-by: Markus Armbruster 
> ---
>  qapi/net.json|  9 +-
>  net/vhost-vdpa.c | 72 ++--
>  2 files changed, 77 insertions(+), 4 deletions(-)
>
> diff --git a/qapi/net.json b/qapi/net.json
> index 9af11e9a3b..75ba2cb989 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -445,12 +445,19 @@
>  # @queues: number of queues to be created for multiqueue vhost-vdpa
>  #  (default: 1)
>  #
> +# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
> +# (default: false)
> +#
> +# Features:
> +# @unstable: Member @x-svq is experimental.
> +#
>  # Since: 5.1
>  ##
>  { 'struct': 'NetdevVhostVDPAOptions',
>'data': {
>  '*vhostdev': 'str',
> -'*queues':   'int' } }
> +'*queues':   'int',
> +'*x-svq':{'type': 'bool', 'features' : [ 'unstable'] } } }
>
>  ##
>  # @NetdevVmnetHostOptions:
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 7ccf9eaf4d..85148a5114 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = {
>  VHOST_INVALID_FEATURE_BIT
>  };
>
> +/** Supported device specific feature bits with SVQ */
> +static const uint64_t vdpa_svq_device_features =
> +BIT_ULL(VIRTIO_NET_F_CSUM) |
> +BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
> +BIT_ULL(VIRTIO_NET_F_MTU) |
> +BIT_ULL(VIRTIO_NET_F_MAC) |
> +BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
> +BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
> +BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
> +BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
> +BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
> +BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
> +BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
> +BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
> +BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
> +BIT_ULL(VIRTIO_NET_F_STATUS) |
> +BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
> +BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
> +BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
> +BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> +BIT_ULL(VIRTIO_NET_F_STANDBY);

We need to have a plan for the full feature support like

indirect, event_index, and packed.

I can help in developing some of these if you wish.

Thanks

> +
>  VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
>  {
>  VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> @@ -133,9 +155,13 @@ err_init:
>  static void vhost_vdpa_cleanup(NetClientState *nc)
>  {
>  VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> +struct vhost_dev *dev = >vhost_net->dev;
>
>  qemu_vfree(s->cvq_cmd_out_buffer);
>  qemu_vfree(s->cvq_cmd_in_buffer);
> +if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
> +g_clear_pointer(>vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> +}
>  if (s->vhost_net) {
>  vhost_net_cleanup(s->vhost_net);
>  g_free(s->vhost_net);
> @@ -437,7 +463,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
> *peer,
> int vdpa_device_fd,
> int queue_pair_index,
> int nvqs,
> -   bool is_datapath)
> +   bool is_datapath,
> +   bool svq,
> +   VhostIOVATree *iova_tree)
>  {
>  NetClientState *nc = NULL;
>  VhostVDPAState *s;
> @@ -455,6 +483,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
> *peer,
>
>  s->vhost_vdpa.device_fd = vdpa_device_fd;
>  s->vhost_vdpa.index = queue_pair_index;
> +s->vhost_vdpa.shadow_vqs_enabled = svq;
> +s->vhost_vdpa.iova_tree = iova_tree;
>  if (!is_datapath) {
>  s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
>  
> vhost_vdpa_net_cvq_cmd_page_len());
> @@ -465,6 +495,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
> *peer,
>
>  s->vhost_vdpa.shadow_vq_ops = _vdpa_net_svq_ops;
>  s->vhost_vdpa.shadow_vq_ops_opaque = s;
> +error_setg(>vhost_vdpa.migration_blocker,
> +   "Migration disabled: vhost-vdpa uses CVQ.");
>  }
>  ret = vhost_vdpa_add(nc, (void *)>vhost_vdpa, queue_pair_index, nvqs);
>  if (ret) {
> @@ -474,6 +506,14 @@ static NetClientState 
> *net_vhost_vdpa_init(NetClientState *peer,
>  return nc;
>  }
>
> +static int vhost_vdpa_get_iova_range(int fd,
> + struct vhost_vdpa_iova_range 
> *iova_range)
> +{
> +int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
> +
> +return ret < 0 ? -errno : 0;
> +}
> +
>  static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
>  {
>  int ret = ioctl(fd,

Re: [PATCH v2 09/19] vhost: Track number of descs in SVQElement

2022-07-14 Thread Jason Wang

On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
>
> Since CVQ will be able to modify elements, the number of descriptors in
> the guest may not match with the number of descriptors exposed. Track
> separately.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  hw/virtio/vhost-shadow-virtqueue.h |  6 ++
>  hw/virtio/vhost-shadow-virtqueue.c | 10 +-
>  2 files changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> b/hw/virtio/vhost-shadow-virtqueue.h
> index f35d4b8f90..143c86a568 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -17,6 +17,12 @@
>
>  typedef struct SVQElement {
>  VirtQueueElement elem;
> +
> +/*
> + * Number of descriptors exposed to the device. May or may not match
> + * guest's
> + */
> +unsigned int ndescs;
>  } SVQElement;

Can we simplify things furtherly by moving ndscs into a dedicated array at svq?

Then we don't need to bother with introducing SVQElement.

Thanks

>
>  /* Shadow virtqueue to relay notifications */
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> b/hw/virtio/vhost-shadow-virtqueue.c
> index 442ca3cbd3..3b112c4ec8 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -243,10 +243,10 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, 
> const struct iovec *out_sg,
>size_t in_num, SVQElement *svq_elem)
>  {
>  unsigned qemu_head;
> -unsigned ndescs = in_num + out_num;
> +svq_elem->ndescs = in_num + out_num;
>  bool ok;
>
> -if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
> +if (unlikely(svq_elem->ndescs > vhost_svq_available_slots(svq))) {
>  return -ENOSPC;
>  }
>
> @@ -393,7 +393,7 @@ static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue 
> *svq,
>  SVQElement *elem;
>  const vring_used_t *used = svq->vring.used;
>  vring_used_elem_t used_elem;
> -uint16_t last_used, last_used_chain, num;
> +uint16_t last_used, last_used_chain;
>
>  if (!vhost_svq_more_used(svq)) {
>  return NULL;
> @@ -420,8 +420,8 @@ static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue 
> *svq,
>  }
>
>  elem = svq->ring_id_maps[used_elem.id];
> -num = elem->elem.in_num + elem->elem.out_num;
> -last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
> +last_used_chain = vhost_svq_last_desc_of_chain(svq, elem->ndescs,
> +   used_elem.id);
>  svq->desc_next[last_used_chain] = svq->free_head;
>  svq->free_head = used_elem.id;
>
> --
> 2.31.1
>

Re: [PATCH v2 15/19] vdpa: manual forward CVQ buffers

2022-07-14 Thread Jason Wang

On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
>
> Do a simple forwarding of CVQ buffers, the same work SVQ could do but
> through callbacks. No functional change intended.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  include/hw/virtio/vhost-vdpa.h |  3 ++
>  hw/virtio/vhost-vdpa.c |  3 +-
>  net/vhost-vdpa.c   | 58 ++
>  3 files changed, 63 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index 7214eb47dc..d85643 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -15,6 +15,7 @@
>  #include 
>
>  #include "hw/virtio/vhost-iova-tree.h"
> +#include "hw/virtio/vhost-shadow-virtqueue.h"
>  #include "hw/virtio/virtio.h"
>  #include "standard-headers/linux/vhost_types.h"
>
> @@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
>  /* IOVA mapping used by the Shadow Virtqueue */
>  VhostIOVATree *iova_tree;
>  GPtrArray *shadow_vqs;
> +const VhostShadowVirtqueueOps *shadow_vq_ops;
> +void *shadow_vq_ops_opaque;
>  struct vhost_dev *dev;
>  VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
>  } VhostVDPA;
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 96997210be..beaaa7049a 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -419,7 +419,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
> struct vhost_vdpa *v,
>  for (unsigned n = 0; n < hdev->nvqs; ++n) {
>  g_autoptr(VhostShadowVirtqueue) svq;
>
> -svq = vhost_svq_new(v->iova_tree, NULL, NULL);
> +svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
> +v->shadow_vq_ops_opaque);
>  if (unlikely(!svq)) {
>  error_setg(errp, "Cannot create svq %u", n);
>  return -1;
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index df1e69ee72..805c9dd6b6 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -11,11 +11,14 @@
>
>  #include "qemu/osdep.h"
>  #include "clients.h"
> +#include "hw/virtio/virtio-net.h"
>  #include "net/vhost_net.h"
>  #include "net/vhost-vdpa.h"
>  #include "hw/virtio/vhost-vdpa.h"
>  #include "qemu/config-file.h"
>  #include "qemu/error-report.h"
> +#include "qemu/log.h"
> +#include "qemu/memalign.h"
>  #include "qemu/option.h"
>  #include "qapi/error.h"
>  #include 
> @@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = {
>  .check_peer_type = vhost_vdpa_check_peer_type,
>  };
>
> +/**
> + * Forward buffer for the moment.
> + */
> +static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> +SVQElement *svq_elem, void 
> *opaque)
> +{
> +VirtQueueElement *elem = _elem->elem;
> +unsigned int n = elem->out_num + elem->in_num;
> +g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
> +size_t in_len, dev_written;
> +virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> +int r;
> +
> +memcpy(dev_buffers, elem->out_sg, elem->out_num);
> +memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
> +
> +r = vhost_svq_add(svq, _buffers[0], elem->out_num, _buffers[1],
> +  elem->in_num, svq_elem);
> +if (unlikely(r != 0)) {
> +if (unlikely(r == -ENOSPC)) {
> +qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
> +  __func__);
> +}
> +goto out;
> +}
> +
> +/*
> + * We can poll here since we've had BQL from the time we sent the
> + * descriptor. Also, we need to take the answer before SVQ pulls by 
> itself,
> + * when BQL is released
> + */
> +dev_written = vhost_svq_poll(svq);
> +if (unlikely(dev_written < sizeof(status))) {
> +error_report("Insufficient written data (%zu)", dev_written);
> +}
> +
> +out:
> +in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, ,
> +  sizeof(status));
> +if (unlikely(in_len < sizeof(status))) {
> +error_report("Bad device CVQ written length");
> +}
> +vhost_svq_push_elem(svq, svq_elem, MIN(in_len, sizeof(status)));
> +g_free(svq_elem);
> +return r;
> +}
> +
> +static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
> +.avail_handler = vhost_vdpa_net_handle_ctrl_avail,
> +};

I wonder if it's possible to even remove this handler. Can we let the
kick to be handled by virtio_net_handler_ctrl() in virtio-net.c?

Thanks

> +
>  static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
> const char *device,
> const char *name,
> @@ -211,6 +265,10 @@ static NetClientState 
> *net_vhost_vdpa_init(NetClientState *peer,
>
>  s->vhost_vdpa.device_fd = vdpa_device_fd;
>  s->vhost_vdpa.index = queue_pair_index;
> +if (!is_datapath) {
> +s->vhost_vdpa.shadow_vq_ops =

Re: [PATCH v2 18/19] vdpa: Add device migration blocker

2022-07-14 Thread Jason Wang

On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
>
> Since the vhost-vdpa device is exposing _F_LOG,

I may miss something but I think it doesn't?

Note that the features were fetched from the vDPA parent.

Thanks

> adding a migration blocker if
> it uses CVQ.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  include/hw/virtio/vhost-vdpa.h |  1 +
>  hw/virtio/vhost-vdpa.c | 14 ++
>  2 files changed, 15 insertions(+)
>
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index d85643..d10a89303e 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -35,6 +35,7 @@ typedef struct vhost_vdpa {
>  bool shadow_vqs_enabled;
>  /* IOVA mapping used by the Shadow Virtqueue */
>  VhostIOVATree *iova_tree;
> +Error *migration_blocker;
>  GPtrArray *shadow_vqs;
>  const VhostShadowVirtqueueOps *shadow_vq_ops;
>  void *shadow_vq_ops_opaque;
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index beaaa7049a..795ed5a049 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -20,6 +20,7 @@
>  #include "hw/virtio/vhost-shadow-virtqueue.h"
>  #include "hw/virtio/vhost-vdpa.h"
>  #include "exec/address-spaces.h"
> +#include "migration/blocker.h"
>  #include "qemu/cutils.h"
>  #include "qemu/main-loop.h"
>  #include "cpu.h"
> @@ -1022,6 +1023,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev 
> *dev)
>  return true;
>  }
>
> +if (v->migration_blocker) {
> +int r = migrate_add_blocker(v->migration_blocker, );
> +if (unlikely(r < 0)) {
> +goto err_migration_blocker;
> +}
> +}
> +
>  for (i = 0; i < v->shadow_vqs->len; ++i) {
>  VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
>  VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
> @@ -1064,6 +1072,9 @@ err:
>  vhost_svq_stop(svq);
>  }
>
> +err_migration_blocker:
> +error_reportf_err(err, "Cannot setup SVQ %u: ", i);
> +
>  return false;
>  }
>
> @@ -1083,6 +1094,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
>  }
>  }
>
> +if (v->migration_blocker) {
> +migrate_del_blocker(v->migration_blocker);
> +}
>  return true;
>  }
>
> --
> 2.31.1
>

Re: [PATCH v2 12/19] vhost: add vhost_svq_poll

2022-07-14 Thread Jason Wang

On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
>
> It allows the Shadow Control VirtQueue to wait for the device to use the
> available buffers.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  hw/virtio/vhost-shadow-virtqueue.h |  1 +
>  hw/virtio/vhost-shadow-virtqueue.c | 22 ++
>  2 files changed, 23 insertions(+)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> b/hw/virtio/vhost-shadow-virtqueue.h
> index 1692541cbb..b5c6e3b3b4 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, const 
> SVQElement *elem,
>  int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
>size_t out_num, const struct iovec *in_sg, size_t in_num,
>SVQElement *elem);
> +size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
>
>  void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
>  void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> b/hw/virtio/vhost-shadow-virtqueue.c
> index 5244896358..31a267f721 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -486,6 +486,28 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
>  } while (!vhost_svq_enable_notification(svq));
>  }
>
> +/**
> + * Poll the SVQ for one device used buffer.
> + *
> + * This function race with main event loop SVQ polling, so extra
> + * synchronization is needed.
> + *
> + * Return the length written by the device.
> + */
> +size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
> +{
> +do {
> +uint32_t len;
> +SVQElement *elem = vhost_svq_get_buf(svq, );
> +if (elem) {
> +return len;
> +}
> +
> +/* Make sure we read new used_idx */
> +smp_rmb();

There's already one smp_rmb(0 in vhost_svq_get_buf(). So this seems useless?

Thanks

> +} while (true);
> +}
> +
>  /**
>   * Forward used buffers.
>   *
> --
> 2.31.1
>

Re: [PATCH 1/8] hmat acpi: Don't require initiator value in -numa

2022-07-14 Thread Liu, Jingqi


Hello,

On 7/11/2022 6:44 PM, Hesham Almatary via wrote:

From: Brice Goglin

The "Memory Proximity Domain Attributes" structure of the ACPI HMAT
has a "Processor Proximity Domain Valid" flag that is currently
always set because Qemu -numa requires an initiator=X value
when hmat=on. Unsetting this flag allows to create more complex
memory topologies by having multiple best initiators for a single
memory target.


It would be better if you can explicitly provide cases in this description
for creating more complex memory topologies without initiator=X.

Thanks,
Jingqi


This patch allows -numa without initiator=X when hmat=on by keeping
the default value MAX_NODES in numa_state->nodes[i].initiator.
All places reading numa_state->nodes[i].initiator already check
whether it's different from MAX_NODES before using it.

Tested with
qemu-system-x86_64 -accel kvm \
  -machine pc,hmat=on \
  -drive if=pflash,format=raw,file=./OVMF.fd \
  -drive media=disk,format=qcow2,file=efi.qcow2 \
  -smp 4 \
  -m 3G \
  -object memory-backend-ram,size=1G,id=ram0 \
  -object memory-backend-ram,size=1G,id=ram1 \
  -object memory-backend-ram,size=1G,id=ram2 \
  -numa node,nodeid=0,memdev=ram0,cpus=0-1 \
  -numa node,nodeid=1,memdev=ram1,cpus=2-3 \
  -numa node,nodeid=2,memdev=ram2 \
  -numa 
hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=10
 \
  -numa 
hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=10485760
 \
  -numa 
hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=20
 \
  -numa 
hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=5242880
 \
  -numa 
hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,latency=30
 \
  -numa 
hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=1048576
 \
  -numa 
hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,latency=20
 \
  -numa 
hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=5242880
 \
  -numa 
hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,latency=10
 \
  -numa 
hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=10485760
 \
  -numa 
hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,latency=30
 \
  -numa 
hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=1048576
which reports NUMA node2 at same distance from both node0 and node1 as seen in 
lstopo:
Machine (2966MB total) + Package P#0
   NUMANode P#2 (979MB)
   Group0
 NUMANode P#0 (980MB)
 Core P#0 + PU P#0
 Core P#1 + PU P#1
   Group0
 NUMANode P#1 (1007MB)
 Core P#2 + PU P#2
 Core P#3 + PU P#3

Before this patch, we had to add ",initiator=X" to "-numa 
node,nodeid=2,memdev=ram2".
The lstopo output difference between initiator=1 and no initiator is:
@@ -1,10 +1,10 @@
  Machine (2966MB total) + Package P#0
+  NUMANode P#2 (979MB)
Group0
  NUMANode P#0 (980MB)
  Core P#0 + PU P#0
  Core P#1 + PU P#1
Group0
  NUMANode P#1 (1007MB)
-NUMANode P#2 (979MB)
  Core P#2 + PU P#2
  Core P#3 + PU P#3

Corresponding changes in the HMAT MPDA structure:
@@ -49,10 +49,10 @@
  [078h 0120   2]   Structure Type :  [Memory Proximity Domain 
Attributes]
  [07Ah 0122   2] Reserved : 
  [07Ch 0124   4]   Length : 0028
-[080h 0128   2]Flags (decoded below) : 0001
-Processor Proximity Domain Valid : 1
+[080h 0128   2]Flags (decoded below) : 
+Processor Proximity Domain Valid : 0
  [082h 0130   2]Reserved1 : 
-[084h 0132   4] Attached Initiator Proximity Domain : 0001
+[084h 0132   4] Attached Initiator Proximity Domain : 0080
  [088h 0136   4]  Memory Proximity Domain : 0002
  [08Ch 0140   4]Reserved2 : 
  [090h 0144   8]Reserved3 : 

Final HMAT SLLB structures:
[0A0h 0160   2]   Structure Type : 0001 [System Locality Latency 
and Bandwidth Information]
[0A2h 0162   2] Reserved : 
[0A4h 0164   4]   Length : 0040
[0A8h 0168   1]Flags (decoded below) : 00
 Memory Hierarchy : 0
[0A9h 0169   1]Data Type : 00
[0AAh 0170   2]Reserved1 : 
[0ACh 0172   4] Initiator Proximity Domains # : 0002
[0B0h 0176   4]   Target Proximity Domains # : 0003
[0B4h 0180   4]Reserved2 : 
[0B8h 0184   8]  Entry Base Unit : 2710
[0C0h 0192   4] Initiator Proximity Domain List : 
[0C4h 0196   4] Initiator Proximity Domain List : 0001
[0C8h 0200   4] Target Proximity Domain List : 
[0CCh 0204   4] Target Proximity Domain List : 0001
[0D0h 0208   4] Target Proximity

[RFC] aspeed/i2c: multi-master between SoC's

2022-07-14 Thread Peter Delevoryas

Hey Cedric, Klaus, and Corey,

So I realized something about the current state of multi-master i2c:

We can't do transfers between two Aspeed I2C controllers, e.g.  AST1030 <->
AST2600. I'm looking into this case in the new fby35 machine (which isn't even
merged yet, just in Cedric's pull request)

This is because the AspeedI2CBusSlave is only designed to receive through
i2c_send_async(). But the AspeedI2CBus master-mode transfers use i2c_send().

So, the AST2600 can't send data to the AST1030. And the AST1030 can't reply to
the AST2600.

(By the way, another small issue: AspeedI2CBusSlave expects the parent of its
parent to be its AspeedI2CBus, but that's not true if multiple SoC's are sharing
an I2CBus. But that's easy to resolve, I'll send a patch for that soon).

I'm wondering how best to resolve the multi-SoC send-async issue, while
retaining the ability to send synchronously to non-SoC slave devices.

I think there's only one way, as far as I can see:

- Force the Aspeed I2C Controller to master the I2C bus before starting a master
  transfer. Even for synchronous transfers.

This shouldn't be a big problem, we can still do synchronous transfers, we just
have to wait for the bus to be free before starting the transfer.

- If the I2C slave targets for a master2slave transfer support async_send, then
  use async_send. This requires refactoring aspeed_i2c_bus_send into a state
  machine to send data asynchronously.

In other words, don't try to do a synchronous transfer to an SoC.

But, of course, we can keep doing synchronous transfers from SoC -> sensor or
sensor -> SoC.

I see the code in aspeed_i2c_bus_send turning into something like this:

diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
index 42c6d69b82..1ea530a77e 100644
--- a/hw/i2c/aspeed_i2c.c
+++ b/hw/i2c/aspeed_i2c.c
@@ -226,10 +226,17 @@ static int aspeed_i2c_dma_read(AspeedI2CBus *bus, uint8_t 
*data)
 return 0;
 }
 
-static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start)
+typedef enum AsyncResult AsyncResult;
+enum AsyncResult {
+DONE,
+YIELD,
+ERROR,
+};
+
+static AsyncResult aspeed_i2c_bus_send(AspeedI2CBus *bus)
 {
 AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(bus->controller);
-int ret = -1;
+AsyncResult ret = DONE;
 int i;
 uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus);
 uint32_t reg_pool_ctrl = aspeed_i2c_bus_pool_ctrl_offset(bus);
@@ -239,41 +246,49 @@ static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t 
pool_start)
 TX_COUNT);
 
 if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) {
-for (i = pool_start; i < pool_tx_count; i++) {
+while (bus->pool_pos < pool_tx_count) {
 uint8_t *pool_base = aic->bus_pool_base(bus);
 
-trace_aspeed_i2c_bus_send("BUF", i + 1, pool_tx_count,
+trace_aspeed_i2c_bus_send("BUF", bus->pool_pos + 1, pool_tx_count,
   pool_base[i]);
-ret = i2c_send(bus->bus, pool_base[i]);
-if (ret) {
+ret = i2c_send_async(bus->bus, pool_base[bus->pool_pos]);
+if (ret == ERROR) {
 break;
 }
+bus->pool_pos++;
+if (ret == YIELD) {
+return YIELD;
+}
 }
 SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, TX_BUFF_EN, 0);
 } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN)) {
 /* In new mode, clear how many bytes we TXed */
-if (aspeed_i2c_is_new_mode(bus->controller)) {
+if (aspeed_i2c_is_new_mode(bus->controller) && bus->pool_pos == 0) {
 ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN_STS, TX_LEN, 0);
 }
 while (bus->regs[reg_dma_len]) {
 uint8_t data;
 aspeed_i2c_dma_read(bus, );
-trace_aspeed_i2c_bus_send("DMA", bus->regs[reg_dma_len],
+trace_aspeed_i2c_bus_send("DMA", bus->regs[bus->pool_pos],
   bus->regs[reg_dma_len], data);
-ret = i2c_send(bus->bus, data);
-if (ret) {
+ret = i2c_send_async(bus->bus, data);
+if (ret == ERROR) {
 break;
 }
+bus->pool_pos++;
 /* In new mode, keep track of how many bytes we TXed */
 if (aspeed_i2c_is_new_mode(bus->controller)) {
 ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN_STS, TX_LEN,
  ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN_STS,
   TX_LEN) + 1);
 }
+if (ret == YIELD) {
+return YIELD;
+}
 }
 SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, TX_DMA_EN, 0);
 } else {
-trace_aspeed_i2c_bus_send("BYTE", pool_start, 1,
+trace_aspeed_i2c_bus_send("BYTE", 1, 1,

Re: [PATCH v2] target/i386: Restore TSX features with taa-no

2022-07-14 Thread Xiaoyao Li


On 7/14/2022 3:59 PM, Paolo Bonzini wrote:

On 7/14/22 07:36, Zhenzhong Duan wrote:
On ICX-2S2 host, when run L2 guest with both L1/L2 using 
Icelake-Server-v3

or above, we got below warning:

"warning: host doesn't support requested feature: MSR(10AH).taa-no 
[bit 8]"


This is because L1 KVM doesn't expose taa-no to L2 if RTM is disabled,
then starting L2 qemu triggers the warning.

Fix it by restoring TSX features in Icelake-Server-v3, which may also 
help

guest performance if host isn't susceptible to TSX Async Abort (TAA)
vulnerabilities.

Fixes: d965dc35592d ("target/i386: Add ARCH_CAPABILITIES related bits 
into Icelake-Server CPU model")

Tested-by: Xiangfei Ma 
Signed-off-by: Zhenzhong Duan 
---
v2: Rewrite commit message


Why wouldn't the fix be (in an Icelake-Server-v4 model) to remove taa-no?


Production Icelake silicon should have the taa-no set, that's the reason 
taa-no was added in v3 model.


When taa-no presents, it's safe to bring TSX features back.

I'm wondering if we need a new version (v7) for this change.


Paolo


  target/i386/cpu.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 14f681e998cc..25ef972a3eed 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3423,6 +3423,9 @@ static const X86CPUDefinition builtin_x86_defs[] 
= {

  {
  .version = 3,
  .props = (PropValue[]) {
+    /* Restore TSX features removed by -v2 above */
+    { "hle", "on" },
+    { "rtm", "on" },
  { "arch-capabilities", "on" },
  { "rdctl-no", "on" },
  { "ibrs-all", "on" },

RE: [PATCH] hw/virtio/virtio-iommu: Enforce power-of-two notify for both MAP and UNMAP

2022-07-14 Thread Zhang, Tina




> -Original Message-
> From: Jean-Philippe Brucker 
> Sent: Thursday, July 14, 2022 5:54 PM
> To: eric.au...@redhat.com
> Cc: m...@redhat.com; qemu-devel@nongnu.org; Jean-Philippe Brucker
> ; Zhang, Tina 
> Subject: [PATCH] hw/virtio/virtio-iommu: Enforce power-of-two notify for
> both MAP and UNMAP
> 
> Currently we only enforce power-of-two mappings (required by the QEMU
> notifier) for UNMAP requests. A MAP request not aligned on a power-of-two
> may be successfully handled by VFIO, and then the corresponding UNMAP
> notify will fail because it will attempt to split that mapping. Ensure MAP and
> UNMAP notifications are consistent.
> 
> Fixes: dde3f08b5cab ("virtio-iommu: Handle non power of 2 range
> invalidations")
> Reported-by: Tina Zhang 
> Signed-off-by: Jean-Philippe Brucker 
> ---
>  hw/virtio/virtio-iommu.c | 44 +++-
>  1 file changed, 25 insertions(+), 19 deletions(-)
> 
> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index
> 281152d338..f3ecbc71af 100644
> --- a/hw/virtio/virtio-iommu.c
> +++ b/hw/virtio/virtio-iommu.c
> @@ -197,6 +197,29 @@ static gint interval_cmp(gconstpointer a,
> gconstpointer b, gpointer user_data)
>  }
>  }
> 
> +static void virtio_iommu_notify_map_unmap(IOMMUMemoryRegion *mr,
> +  IOMMUTLBEvent *event,
> +  hwaddr virt_start, hwaddr
> +virt_end) {
> +uint64_t delta = virt_end - virt_start;
> +
> +event->entry.iova = virt_start;
> +event->entry.addr_mask = delta;
> +
> +if (delta == UINT64_MAX) {
> +memory_region_notify_iommu(mr, 0, *event);
> +}
> +
> +while (virt_start != virt_end + 1) {
> +uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end,
> + 64);
> +
> +event->entry.addr_mask = mask;
> +event->entry.iova = virt_start;
> +memory_region_notify_iommu(mr, 0, *event);
> +virt_start += mask + 1;

Hi Jean, 

We also need to increase the event->translated_addr for the map request here.

Thanks,
Tina

> +}
> +}
> +
>  static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr
> virt_start,
>  hwaddr virt_end, hwaddr paddr,
>  uint32_t flags) @@ -215,19 +238,16 @@ 
> static void
> virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start,
> 
>  event.type = IOMMU_NOTIFIER_MAP;
>  event.entry.target_as = _space_memory;
> -event.entry.addr_mask = virt_end - virt_start;
> -event.entry.iova = virt_start;
>  event.entry.perm = perm;
>  event.entry.translated_addr = paddr;
> 
> -memory_region_notify_iommu(mr, 0, event);
> +virtio_iommu_notify_map_unmap(mr, , virt_start, virt_end);
>  }
> 
>  static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr,
> hwaddr virt_start,
>hwaddr virt_end)  {
>  IOMMUTLBEvent event;
> -uint64_t delta = virt_end - virt_start;
> 
>  if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) {
>  return;
> @@ -239,22 +259,8 @@ static void
> virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start,
>  event.entry.target_as = _space_memory;
>  event.entry.perm = IOMMU_NONE;
>  event.entry.translated_addr = 0;
> -event.entry.addr_mask = delta;
> -event.entry.iova = virt_start;
> -
> -if (delta == UINT64_MAX) {
> -memory_region_notify_iommu(mr, 0, event);
> -}
> 
> -
> -while (virt_start != virt_end + 1) {
> -uint64_t mask = dma_aligned_pow2_mask(virt_start, virt_end, 64);
> -
> -event.entry.addr_mask = mask;
> -event.entry.iova = virt_start;
> -memory_region_notify_iommu(mr, 0, event);
> -virt_start += mask + 1;
> -}
> +virtio_iommu_notify_map_unmap(mr, , virt_start, virt_end);
>  }
> 
>  static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value,
> --
> 2.36.1

Re: [PATCH 7/7] target/arm: Honour VTCR_EL2 bits in Secure EL2

2022-07-14 Thread Richard Henderson


On 7/14/22 18:53, Peter Maydell wrote:

In regime_tcr() we return the appropriate TCR register for the
translation regime.  For Secure EL2, we return the VSTCR_EL2 value,
but in this translation regime some fields that control behaviour are
in VTCR_EL2.  When this code was originally written (as the comment
notes), QEMU didn't care about any of those fields, but we have since
added support for features such as LPA2 which do need the values from
those fields.

Synthesize a TCR value by merging in the relevant VTCR_EL2 fields to
the VSTCR_EL2 value.

Resolves:https://gitlab.com/qemu-project/qemu/-/issues/1103
Signed-off-by: Peter Maydell
---
  target/arm/cpu.h   | 19 +++
  target/arm/internals.h | 22 +++---
  2 files changed, 38 insertions(+), 3 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 6/7] target/arm: Store TCR_EL* registers as uint64_t

2022-07-14 Thread Richard Henderson


On 7/14/22 18:53, Peter Maydell wrote:

Change the representation of the TCR_EL* registers in the CPU state
struct from struct TCR to uint64_t.  This allows us to drop the
custom vmsa_ttbcr_raw_write() function, moving the "enforce RES0"
checks to their more usual location in the writefn
vmsa_ttbcr_write().  We also don't need the resetfn any more.

Signed-off-by: Peter Maydell
---
  target/arm/cpu.h  |  8 +
  target/arm/internals.h|  6 ++--
  target/arm/cpu.c  |  2 +-
  target/arm/debug_helper.c |  2 +-
  target/arm/helper.c   | 75 +++
  target/arm/ptw.c  |  2 +-
  6 files changed, 27 insertions(+), 68 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 5/7] target/arm: Store VTCR_EL2, VSTCR_EL2 registers as uint64_t

2022-07-14 Thread Richard Henderson


On 7/14/22 18:53, Peter Maydell wrote:

Change the representation of the VSTCR_EL2 and VTCR_EL2 registers in
the CPU state struct from struct TCR to uint64_t.

Signed-off-by: Peter Maydell
---
  target/arm/cpu.h   |  4 ++--
  target/arm/internals.h |  4 ++--
  target/arm/helper.c|  4 +---
  target/arm/ptw.c   | 14 +++---
  4 files changed, 12 insertions(+), 14 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 4/7] target/arm: Fix big-endian host handling of VTCR

2022-07-14 Thread Richard Henderson


On 7/14/22 18:53, Peter Maydell wrote:

We have a bug in our handling of accesses to the AArch32 VTCR
register on big-endian hosts: we were not adjusting the part of the
uint64_t field within TCR that the generated code would access.  That
can be done with offsetoflow32(), by using an ARM_CP_STATE_BOTH cpreg
struct, or by defining a full set of read/write/reset functions --
the various other TCR cpreg structs used one or another of those
strategies, but for VTCR we did not, so on a big-endian host VTCR
accesses would touch the wrong half of the register.

Use offsetoflow32() in the VTCR register struct.  This works even
though the field in the CPU struct is currently a struct TCR, because
the first field in that struct is the uint64_t raw_tcr.

None of the other TCR registers have this bug -- either they are
AArch64 only, or else they define resetfn, writefn, etc, and
expect to be passed the full struct pointer.

Signed-off-by: Peter Maydell
---


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 3/7] target/arm: Fold regime_tcr() and regime_tcr_value() together

2022-07-14 Thread Richard Henderson


On 7/14/22 18:52, Peter Maydell wrote:

The only caller of regime_tcr() is now regime_tcr_value(); fold the
two together, and use the shorter and more natural 'regime_tcr'
name for the new function.

Signed-off-by: Peter Maydell
---
  target/arm/internals.h  | 16 +---
  target/arm/helper.c |  6 +++---
  target/arm/ptw.c|  6 +++---
  target/arm/tlb_helper.c |  2 +-
  4 files changed, 12 insertions(+), 18 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 2/7] target/arm: Calculate mask/base_mask in get_level1_table_address()

2022-07-14 Thread Richard Henderson


On 7/14/22 18:52, Peter Maydell wrote:

In get_level1_table_address(), instead of using precalculated values
of mask and base_mask from the TCR struct, calculate them directly
(in the same way we currently do in vmsa_ttbcr_raw_write() to
populate the TCR struct fields).

Signed-off-by: Peter Maydell 


Reviewed-by: Richard Henderson 


r~

Re: [PATCH 1/7] target/arm: Define and use new regime_tcr_value() function

2022-07-14 Thread Richard Henderson


On 7/14/22 18:52, Peter Maydell wrote:

The regime_tcr() function returns a pointer to a struct TCR
corresponding to the TCR controlling a translation regime.  The
struct TCR has the raw value of the register, plus two fields mask
and base_mask which are used as a small optimization in the case of
32-bit short-descriptor lookups.  Almost all callers of regime_tcr()
only want the raw register value.  Define and use a new
regime_tcr_value() function which returns only the raw 64-bit
register value.

This is a preliminary to removing the 32-bit short descriptor
optimization -- it only saves a handful of bit operations, which is
tiny compared to the overhead of doing a page table walk at all, and
the TCR struct is awkward and makes fixing
https://gitlab.com/qemu-project/qemu/-/issues/1103  unnecessarily
difficult.

Signed-off-by: Peter Maydell
---
  target/arm/internals.h  | 6 ++
  target/arm/helper.c | 6 +++---
  target/arm/ptw.c| 8 
  target/arm/tlb_helper.c | 2 +-
  4 files changed, 14 insertions(+), 8 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH] MAINTAINERS: Add myself as Guest Agent co-maintainer

2022-07-14 Thread Michael Roth

On Wed, Jul 13, 2022 at 04:32:49PM +0300, Konstantin Kostiuk wrote:
> Signed-off-by: Konstantin Kostiuk 

Acked-by: Michael Roth 

> ---
>  MAINTAINERS | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 450abd0252..22a4ffe0a2 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2880,6 +2880,7 @@ T: git https://repo.or.cz/qemu/armbru.git qapi-next
>  
>  QEMU Guest Agent
>  M: Michael Roth 
> +M: Konstantin Kostiuk 
>  S: Maintained
>  F: qga/
>  F: docs/interop/qemu-ga.rst
> -- 
> 2.25.1
>

[python-qemu-qmp MR #8] Add git-based package versions, publishing scripts, and dev package builds

2022-07-14 Thread GitLab Bot

Author: John Snow - https://gitlab.com/jsnow
Merge Request: 
https://gitlab.com/qemu-project/python-qemu-qmp/-/merge_requests/8
... from: jsnow/python-qemu-qmp:packaging
... into: qemu-project/python-qemu-qmp:main

This is all about streamlining the process of tagging, building, and 
publishing. The script authored here, `publish.py`, is designed to make the 
publishing process fool-resistant by providing a slew of smoke tests designed 
to prevent erroneous, premature, or inconsistent releases.

A note:
- Before this is merged, I will want to tag 
765e2e210dcbf975f93d1b142761651e61772da6 as "v0.0.0a1" on origin/main so that 
the dev package build that will be incurred after the merge is accepted will 
have an appropriate version (v0.0.0a2.devNN+09abcdef). I didn't do it yet so 
that there can be feedback on this scheme first, in case. I do not want to ever 
delete a tag from `origin/main` in keeping with the principle that git commit 
history should never change.

The intended release process is expected to be something like this:

1. An MR is submitted that updates the README with new changelog info, any last 
touchups, etc. The MR makes it clear to reviewer(s) that a new version will be 
published contingent on review, successful pipelines, etc.
2. MR is approved and merged. Pipelines run and pass.
3. A maintainer (me) runs `python3 publish.py tag` from my local repo and 
assigns a new version number. The annotated tag is pushed to origin.
4. Maintainer runs `python3 publish.py build` to create new distribution files 
on their local machine.
5. Maintainer runs `python3 publish.py publish --test` as a dry run to push a 
new package version to `test.pypi.org`. Maintainer inspects that it appears to 
have worked correctly (readme looks right, metadata appears to render 
correctly, etc) and all pieces appear to be in place.
6. Maintainer runs `python3 publish.py publish` to finalize the submission to 
PyPI.

The authentication for the publish script is provided by the environment 
variable `TWINE_PASSWORD`, which takes the form of a PyPI authorization token. 
It would also be possible to utilize keyring support, but I didn't leap that 
far ahead yet.

Each version tag is designed to be signed and annotated. Each distribution file 
uploaded to PyPI is also designed to be signed. At present I am just using my 
own personal key, but I could look into creating a generic "QEMU project python 
release" key for the purpose, if requested. (Please suggest key creation 
parameters in this case.)

LASTLY, I intend to - after this series is merged - send a followup MR to 
indicate the v0.0.1 release, and then test the process by tagging and releasing 
v0.0.1. See milestone %"v0.0.1 (First release)" 

See the commit messages on each change attached here for additional 
information, notes, musings, poetry, etc.

Closes #16

---

This is an automated message. This bot will only relay the creation of new merge
requests and will not relay review comments, new revisions, or concluded merges.
Please follow the GitLab link to participate in review.

Re: [PATCH 2/3] hw/sensor: max31785 : add fan status, tach target, and tach input object properties

2022-07-14 Thread Maheswara Kurapati


Hello Peter,

Thank you for the review.  Please see my comments inline.

Thank you,

Mahesh

On 7/14/22 8:10 AM, Peter Maydell wrote:

On Thu, 14 Jul 2022 at 14:04, Maheswara Kurapati
 wrote:

This fix adds object properties for the FAN_COMMAND_1 (3Bh), STATUS_FANS_1_2 
(81h),
READ_FAN_SPEED_1 (90h) registers for the MAX31785 instrumentation. An additional
property tach_margin_percent updates the tachs for a configured percent of
FAN_COMMAND_1 value.

Registerproperty
--
FAN_COMMAND_1 (3Bh) fan_target
STATUS_FANS_1_2 (81h)   status_fans_1_2
READ_FAN_SPEED_1 (90h)  fan_input

This commit message is missing the rationale -- why do we need this?
The STATUS_FANS_1_2, and READ_FAN_SPEED_1 registers are read-only. I 
added these properties to simulate the error device faults.


I am also not sure that we should be defining properties that are
just straight 1:1 with the device registers. Compare the way we
handle temperature-sensor values, where the property values are
defined in a generic manner (same units representation) regardless
of the underlying device and the device's property-set-get implementation
then handles converting that to and from whatever internal implementation
representation the device happens to use.
I am not sure I understood your comment.  I checked hw/sensors/tmp105.c, 
in which a "temperature" property is added for the tmp_input field in 
almost the similar way what I did, except that the registers in the 
MAX31785 are in direct format.


thanks
-- PMM

Re: [PATCH RESEND 00/11] target/ppc: Implement slbiag move slb* to decodetree

2022-07-14 Thread Daniel Henrique Barboza


Queued in gitlab.com/danielhb/qemu/tree/ppc-next. Thanks,


Daniel

On 7/1/22 10:34, Lucas Coutinho wrote:

Resent after rebasing and fixing conflicts with master.
Add Leandro Lupori as reviewer.

Based-on: <20220624191424.190471-1-leandro.lup...@eldorado.org.br>

Implement the following PowerISA v3.0 instuction:
slbiag: SLB Invalidate All Global X-form

Move the following PowerISA v3.0 instuction to decodetree:
slbie: SLB Invalidate Entry X-form
slbieg: SLB Invalidate Entry Global X-form
slbia: SLB Invalidate All X-form
slbmte: SLB Move To Entry X-form
slbmfev: SLB Move From Entry VSID X-form
slbmfee: SLB Move From Entry ESID X-form
slbfee: SLB Find Entry ESID
slbsync: SLB Synchronize

Lucas Coutinho (9):
   target/ppc: Move slbie to decodetree
   target/ppc: Move slbieg to decodetree
   target/ppc: Move slbia to decodetree
   target/ppc: Move slbmte to decodetree
   target/ppc: Move slbmfev to decodetree
   target/ppc: Move slbmfee to decodetree
   target/ppc: Move slbfee to decodetree
   target/ppc: Move slbsync to decodetree
   target/ppc: Implement slbiag

Matheus Ferst (2):
   target/ppc: receive DisasContext explicitly in GEN_PRIV
   target/ppc: add macros to check privilege level

  target/ppc/helper.h  |  15 +-
  target/ppc/insn32.decode |  26 ++
  target/ppc/mmu-hash64.c  |  41 +-
  target/ppc/translate.c   | 417 +++
  target/ppc/translate/fixedpoint-impl.c.inc   |   7 +-
  target/ppc/translate/fp-impl.c.inc   |   4 +-
  target/ppc/translate/storage-ctrl-impl.c.inc | 146 +++
  7 files changed, 377 insertions(+), 279 deletions(-)

Re: [PATCH v3 0/2] target/ppc: Implement ISA 3.00 tlbie[l]

2022-07-14 Thread Daniel Henrique Barboza


Queued in gitlab.com/danielhb/qemu/tree/ppc-next. Thanks,


Daniel

On 7/12/22 16:37, Leandro Lupori wrote:

Changes from v2:
- Moved TLBIE defines from helper.h to mmu-book3s-v3.h

Leandro Lupori (2):
   target/ppc: Move tlbie[l] to decode tree
   target/ppc: Implement ISA 3.00 tlbie[l]

  target/ppc/cpu_init.c|   4 +-
  target/ppc/helper.h  |   2 +
  target/ppc/insn32.decode |   8 +
  target/ppc/mmu-book3s-v3.h   |  15 ++
  target/ppc/mmu_helper.c  | 154 +++
  target/ppc/translate.c   |  64 +---
  target/ppc/translate/storage-ctrl-impl.c.inc | 104 +
  7 files changed, 287 insertions(+), 64 deletions(-)
  create mode 100644 target/ppc/translate/storage-ctrl-impl.c.inc

Re: [PATCH v3] target/ppc: check tb_env != 0 before printing TBU/TBL/DECR

2022-07-14 Thread Daniel Henrique Barboza





On 7/14/22 14:23, Matheus Ferst wrote:

When using "-machine none", env->tb_env is not allocated, causing the
segmentation fault reported in issue #85 (launchpad bug #811683). To
avoid this problem, check if the pointer != NULL before calling the
methods to print TBU/TBL/DECR.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/85
Signed-off-by: Matheus Ferst 
---
v3:
   - Only check env->tb_env in softmmu, linux-user get timebase from
 elsewhere. Also, try to make the qemu_fprintf call more readable.
   - Link to v2: 
https://lists.gnu.org/archive/html/qemu-ppc/2022-07/msg00193.html
---


Reviewed-by: Daniel Henrique Barboza 


And queued.


Daniel


  target/ppc/cpu_init.c | 18 --
  target/ppc/monitor.c  |  9 +
  2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 86ad28466a..313c8bb300 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7476,17 +7476,15 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, int 
flags)
   "%08x iidx %d didx %d\n",
   env->msr, env->spr[SPR_HID0], env->hflags,
   cpu_mmu_index(env, true), cpu_mmu_index(env, false));
-#if !defined(NO_TIMER_DUMP)
-qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
  #if !defined(CONFIG_USER_ONLY)
- " DECR " TARGET_FMT_lu
-#endif
- "\n",
- cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)
-#if !defined(CONFIG_USER_ONLY)
- , cpu_ppc_load_decr(env)
-#endif
-);
+if (env->tb_env) {
+qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
+ " DECR " TARGET_FMT_lu "\n", cpu_ppc_load_tbu(env),
+ cpu_ppc_load_tbl(env), cpu_ppc_load_decr(env));
+}
+#else
+qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64 "\n", cpu_ppc_load_tbu(env),
+ cpu_ppc_load_tbl(env));
  #endif
  for (i = 0; i < 32; i++) {
  if ((i & (RGPL - 1)) == 0) {
diff --git a/target/ppc/monitor.c b/target/ppc/monitor.c
index 0b805ef6e9..8250b1304e 100644
--- a/target/ppc/monitor.c
+++ b/target/ppc/monitor.c
@@ -55,6 +55,9 @@ static target_long monitor_get_decr(Monitor *mon, const 
struct MonitorDef *md,
  int val)
  {
  CPUArchState *env = mon_get_cpu_env(mon);
+if (!env->tb_env) {
+return 0;
+}
  return cpu_ppc_load_decr(env);
  }
  
@@ -62,6 +65,9 @@ static target_long monitor_get_tbu(Monitor *mon, const struct MonitorDef *md,

 int val)
  {
  CPUArchState *env = mon_get_cpu_env(mon);
+if (!env->tb_env) {
+return 0;
+}
  return cpu_ppc_load_tbu(env);
  }
  
@@ -69,6 +75,9 @@ static target_long monitor_get_tbl(Monitor *mon, const struct MonitorDef *md,

 int val)
  {
  CPUArchState *env = mon_get_cpu_env(mon);
+if (!env->tb_env) {
+return 0;
+}
  return cpu_ppc_load_tbl(env);
  }

Re: [PATCH v2 07/11] acpi/tests/bits: add python test that exercizes QEMU bios tables using biosbits

2022-07-14 Thread Michael S. Tsirkin

On Thu, Jul 14, 2022 at 07:49:36PM +0530, Ani Sinha wrote:
> > so you skip downlaod if it already exists locally. IIUC it is looking
> > in the CWD, which is presumably the directory the QEMU build is
> > performed in ?
> 
> Yes, build/test/pytest/bits-test
> 
> > So if dev cleans their build tree, the cache is lost ?
> 
> Yes.
> 
> >
> > Avocado has a more persistent cache outside the build tree IIUC.
> >
> 
> So that is why I have the environment variable mechanism for passing to
> the test that will point to the location of the archives. The developer
> can download the files there and point to the test. Or I can change the
> scriprt accordingly if we know where we were downloading. We were
> discussing submodules and it was fiercely hated. So mst suggested another
> repo where to keep the binaries. My script that check out that repo
> somewhere outside the build directory and point the test to that location.
> 
> I have kept several options open. We just need to make some decisions.
> 

For now I would probably just have a script to fetch into source tree.

-- 
MST

Re: [PATCH v2 07/11] acpi/tests/bits: add python test that exercizes QEMU bios tables using biosbits

2022-07-14 Thread Michael S. Tsirkin

On Sun, Jul 10, 2022 at 10:30:10PM +0530, Ani Sinha wrote:
> This change adds python based test environment that can be used to run pytest
> from within a virtual environment. A bash script sets up a virtual environment
> and then runs the python based tests from within that environment.
> All dependent python packages are installed in the virtual environment using
> pip python module. QEMU python test modules are also available in the 
> environment
> for spawning the QEMU based VMs.
> 
> It also introduces QEMU acpi/smbios biosbits python test script which is run
> from within the python virtual environment. When the bios bits tests are run,
> bios bits binaries are downloaded from an external repo/location.
> Currently, the test points to an external private github repo where the bits
> archives are checked in.
> 
> Signed-off-by: Ani Sinha 
> ---
>  tests/pytest/acpi-bits/acpi-bits-test-venv.sh |  59 +++
>  tests/pytest/acpi-bits/acpi-bits-test.py  | 382 ++
>  tests/pytest/acpi-bits/meson.build|  33 ++
>  tests/pytest/acpi-bits/requirements.txt   |   1 +
>  4 files changed, 475 insertions(+)
>  create mode 100644 tests/pytest/acpi-bits/acpi-bits-test-venv.sh
>  create mode 100644 tests/pytest/acpi-bits/acpi-bits-test.py
>  create mode 100644 tests/pytest/acpi-bits/meson.build
>  create mode 100644 tests/pytest/acpi-bits/requirements.txt
> 
> diff --git a/tests/pytest/acpi-bits/acpi-bits-test-venv.sh 
> b/tests/pytest/acpi-bits/acpi-bits-test-venv.sh
> new file mode 100644
> index 00..186395473b
> --- /dev/null
> +++ b/tests/pytest/acpi-bits/acpi-bits-test-venv.sh
> @@ -0,0 +1,59 @@
> +#!/usr/bin/env bash
> +# Generates a python virtual environment for the test to run.
> +# Then runs python test scripts from within that virtual environment.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see .
> +#
> +# Author: Ani Sinha 
> +
> +set -e
> +
> +MYPATH=$(realpath ${BASH_SOURCE:-$0})
> +MYDIR=$(dirname $MYPATH)
> +
> +if [ -z "$PYTEST_SOURCE_ROOT" ]; then
> +echo -n "Please set QTEST_SOURCE_ROOT env pointing"
> +echo " to the root of the qemu source tree."
> +echo -n "This is required so that the test can find the "
> +echo "python modules that it needs for execution."
> +exit 1
> +fi
> +SRCDIR=$PYTEST_SOURCE_ROOT
> +TESTSCRIPTS=("acpi-bits-test.py")
> +PIPCMD="-m pip -q --disable-pip-version-check"
> +# we need to save the old value of PWD before we do a change-dir later
> +PYTEST_PWD=$PWD
> +
> +TESTS_PYTHON=/usr/bin/python3
> +TESTS_VENV_REQ=requirements.txt
> +
> +# sadly for pip -e and -t options do not work together.
> +# please see https://github.com/pypa/pip/issues/562
> +cd $MYDIR
> +
> +$TESTS_PYTHON -m venv .
> +$TESTS_PYTHON $PIPCMD install -e $SRCDIR/python/
> +[ -f $TESTS_VENV_REQ ] && \
> +$TESTS_PYTHON $PIPCMD install -r $TESTS_VENV_REQ || exit 0
> +
> +# venv is activated at this point.
> +
> +# run the test
> +for testscript in ${TESTSCRIPTS[@]} ; do
> +export PYTEST_PWD; python3 $testscript
> +done
> +
> +cd $PYTEST_PWD
> +
> +exit 0
> diff --git a/tests/pytest/acpi-bits/acpi-bits-test.py 
> b/tests/pytest/acpi-bits/acpi-bits-test.py
> new file mode 100644
> index 00..97e61eb709
> --- /dev/null
> +++ b/tests/pytest/acpi-bits/acpi-bits-test.py
> @@ -0,0 +1,382 @@
> +#!/usr/bin/env python3
> +# group: rw quick
> +# Exercize QEMU generated ACPI/SMBIOS tables using biosbits,
> +# https://biosbits.org/
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see .
> +#
> +# Some parts are slightly taken from qtest.py and iotests.py
> +#
> +# Authors:
> +#  Ani Sinha 
> +
> +# pylint: disable=invalid-name
> +
> +"""
> +QEMU bios tests using biosbits available at
> +https://biosbits.org/.
> +"""
> +
>

Re: [PATCH v8 08/12] s390x/cpu_topology: implementing numa for the s390x topology

2022-07-14 Thread Pierre Morel





On 7/14/22 16:57, Janis Schoetterl-Glausch wrote:

On 6/20/22 16:03, Pierre Morel wrote:

S390x CPU Topology allows a non uniform repartition of the CPU
inside the topology containers, sockets, books and drawers.

We use numa to place the CPU inside the right topology container
and report the non uniform topology to the guest.

Note that s390x needs CPU0 to belong to the topology and consequently
all topology must include CPU0.

We accept a partial QEMU numa definition, in that case undefined CPUs
are added to free slots in the topology starting with slot 0 and going
up.


I don't understand why doing it this way, via numa, makes sense for us.
We report the topology to the guest via STSI, which tells the guest
what the topology "tree" looks like. We don't report any numa distances to the 
guest.
The natural way to specify where a cpu is added to the vm, seems to me to be
by specify the socket, book, ... IDs when doing a device_add or via -device on
the command line.

[...]



It is a choice to have the core-id to determine were the CPU is situated 
in the topology.


But yes we can chose the use drawer-id,book-id,socket-id and use a 
core-id starting on 0 on each socket.


It is not done in the current implementation because the core-id implies 
the socket-id, book-id and drawer-id together with the smp parameters.



--
Pierre Morel
IBM Lab Boeblingen

Re: [PATCH v8 00/12] s390x: CPU Topology

2022-07-14 Thread Pierre Morel





On 7/14/22 20:43, Janis Schoetterl-Glausch wrote:

On 6/20/22 16:03, Pierre Morel wrote:

Hi,

This new spin is essentially for coherence with the last Linux CPU
Topology patch, function testing and coding style modifications.

Forword
===

The goal of this series is to implement CPU topology for S390, it
improves the preceeding series with the implementation of books and
drawers, of non uniform CPU topology and with documentation.

To use these patches, you will need the Linux series version 10.
You find it there:
https://lkml.org/lkml/2022/6/20/590

Currently this code is for KVM only, I have no idea if it is interesting
to provide a TCG patch. If ever it will be done in another series.

To have a better understanding of the S390x CPU Topology and its
implementation in QEMU you can have a look at the documentation in the
last patch or follow the introduction here under.

A short introduction


CPU Topology is described in the S390 POP with essentially the description
of two instructions:

PTF Perform Topology function used to poll for topology change
 and used to set the polarization but this part is not part of this item.

STSI Store System Information and the SYSIB 15.1.x providing the Topology
 configuration.

S390 Topology is a 6 levels hierarchical topology with up to 5 level
 of containers. The last topology level, specifying the CPU cores.

 This patch series only uses the two lower levels sockets and cores.
 
 To get the information on the topology, S390 provides the STSI

 instruction, which stores a structures providing the list of the
 containers used in the Machine topology: the SYSIB.
 A selector within the STSI instruction allow to chose how many topology
 levels will be provide in the SYSIB.

 Using the Topology List Entries (TLE) provided inside the SYSIB we
 the Linux kernel is able to compute the information about the cache
 distance between two cores and can use this information to take
 scheduling decisions.


Do the socket, book, ... metaphors and looking at STSI from the existing
smp infrastructure even make sense?


Sorry, I do not understand.
I admit the cover-letter is old and I did not rewrite it really good 
since the first patch series.


What we do is:
Compute the STSI from the SMP + numa + device QEMU parameters .



STSI 15.1.x reports the topology to the guest and for a virtual machine,
this topology can be very dynamic. So a CPU can move from from one topology
container to another, but the socket of a cpu changing while it's running seems
a bit strange. And this isn't supported by this patch series as far as I 
understand,
the only topology changes are on hotplug.


A CPU changing from a socket to another socket is the only case the PTF 
instruction reports a change in the topology with the case a new CPU is 
plug in.

It is not expected to appear often but it does appear.
The code has been removed from the kernel in spin 10 for 2 reasons:
1) we decided to first support only dedicated and pinned CPU
2) Christian fears it may happen too often due to Linux host scheduling 
and could be a performance problem


So yes now we only have a topology report on vCPU plug.











--
Pierre Morel
IBM Lab Boeblingen

[PATCH] decodetree.rst: add hint about format reuse with '.'

2022-07-14 Thread Daniel Henrique Barboza

This observation came up during the review of "Move tlbie[l] to decode
tree" patch [1] and it seems useful enough to be added in the docs.

[1] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg02195.html

Signed-off-by: Daniel Henrique Barboza 
---
 docs/devel/decodetree.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst
index 49ea50c2a7..6bcb94f449 100644
--- a/docs/devel/decodetree.rst
+++ b/docs/devel/decodetree.rst
@@ -110,7 +110,9 @@ A *fixedbit_elt* describes a contiguous sequence of bits 
that must
 be 1, 0, or don't care.  The difference between '.' and '-'
 is that '.' means that the bit will be covered with a field or a
 final 0 or 1 from the pattern, and '-' means that the bit is really
-ignored by the cpu and will not be specified.
+ignored by the cpu and will not be specified.  The same format can
+be reused in multiple patterns by using '.' and let the pattern
+decide whether the given bit will be ignored or not.
 
 A *field_elt* describes a simple field only given a width; the position of
 the field is implied by its position with respect to other *fixedbit_elt*
-- 
2.36.1

Re: [PATCH v3 1/2] target/ppc: Move tlbie[l] to decode tree

2022-07-14 Thread Daniel Henrique Barboza





On 7/14/22 16:31, Leandro Lupori wrote:

On 7/14/22 15:45, Daniel Henrique Barboza wrote:

On 7/12/22 16:37, Leandro Lupori wrote:

Also decode RIC, PRS and R operands.

Signed-off-by: Leandro Lupori 
---
  target/ppc/cpu_init.c    |  4 +-
  target/ppc/insn32.decode |  8 ++
  target/ppc/translate.c   | 64 +-
  target/ppc/translate/storage-ctrl-impl.c.inc | 87 
  4 files changed, 99 insertions(+), 64 deletions(-)
  create mode 100644 target/ppc/translate/storage-ctrl-impl.c.inc

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index c16cb8dbe7..8d7e77f778 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6368,7 +6368,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
 PPC_FLOAT_EXT |
 PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
 PPC_MEM_SYNC | PPC_MEM_EIEIO |
-   PPC_MEM_TLBSYNC |
+   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
 PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
 PPC_SEGMENT_64B | PPC_SLBI |
 PPC_POPCNTB | PPC_POPCNTWD |
@@ -6585,7 +6585,7 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
 PPC_FLOAT_EXT |
 PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
 PPC_MEM_SYNC | PPC_MEM_EIEIO |
-   PPC_MEM_TLBSYNC |
+   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
 PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
 PPC_SEGMENT_64B | PPC_SLBI |
 PPC_POPCNTB | PPC_POPCNTWD |
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 6ea48d5163..2b985249b8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -809,3 +809,11 @@ VMODSD  000100 . . . 1001011    @VX
  VMODUD  000100 . . . 11011001011    @VX
  VMODSQ  000100 . . . 1111011    @VX
  VMODUQ  000100 . . . 1101011    @VX
+
+## TLB Management Instructions
+
+_tlbie    rb rs ric prs:bool r:bool
+@X_tlbie    .. rs:5 - ric:2 prs:1 r:1 rb:5 .. . _tlbie


You're marking bit 11 as ignored but you're not marking 31 as ignored. The way
the argument patterns are made in this file seems to be either not mark the
ignored bits (e.g. most of args from the start of the file) or mark all ignore
bits (e.g. @XL_S from RFEBB).

I am being petty, yes. This makes no functional change in the instruction, but
I'd rather mark bit 31 as ignored in @X_tlbie as well.

I did that in my tree and it seems to work fine. If you're ok with this change,



Reviewed-by: Daniel Henrique Barboza 



Right, the @X_tlbie pattern ended up inconsistent with regard to ignored bits. 
I'm ok with changing bit 31 of it to ignored.

Talking with the guys here, they've explained me that it is usually better to 
use '.' with format definitions ('@'), to make it easier to reuse them for more 
instructions, some of which may ignore a given bit while others may not. But 
for @X_tlbie it's ok to use dots or dashes for bits 11 and 31, as it's used 
only by TLBIE and TLBIEL.



Makes sense. Thanks for the explanation. I'll keep that in mind.


Daniel




Thanks,
Leandro





+
+TLBIE   01 . - .. . . . 0100110010 - @X_tlbie
+TLBIEL  01 . - .. . . . 0100010010 - @X_tlbie

Re: [PATCH v3 1/2] target/ppc: Move tlbie[l] to decode tree

2022-07-14 Thread Leandro Lupori


On 7/14/22 15:45, Daniel Henrique Barboza wrote:

On 7/12/22 16:37, Leandro Lupori wrote:

Also decode RIC, PRS and R operands.

Signed-off-by: Leandro Lupori 
---
  target/ppc/cpu_init.c    |  4 +-
  target/ppc/insn32.decode |  8 ++
  target/ppc/translate.c   | 64 +-
  target/ppc/translate/storage-ctrl-impl.c.inc | 87 
  4 files changed, 99 insertions(+), 64 deletions(-)
  create mode 100644 target/ppc/translate/storage-ctrl-impl.c.inc

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index c16cb8dbe7..8d7e77f778 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6368,7 +6368,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
 PPC_FLOAT_EXT |
 PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
 PPC_MEM_SYNC | PPC_MEM_EIEIO |
-   PPC_MEM_TLBSYNC |
+   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
 PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
 PPC_SEGMENT_64B | PPC_SLBI |
 PPC_POPCNTB | PPC_POPCNTWD |
@@ -6585,7 +6585,7 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void 
*data)

 PPC_FLOAT_EXT |
 PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
 PPC_MEM_SYNC | PPC_MEM_EIEIO |
-   PPC_MEM_TLBSYNC |
+   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
 PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
 PPC_SEGMENT_64B | PPC_SLBI |
 PPC_POPCNTB | PPC_POPCNTWD |
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 6ea48d5163..2b985249b8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -809,3 +809,11 @@ VMODSD  000100 . . . 
1001011    @VX

  VMODUD  000100 . . . 11011001011    @VX
  VMODSQ  000100 . . . 1111011    @VX
  VMODUQ  000100 . . . 1101011    @VX
+
+## TLB Management Instructions
+
+_tlbie    rb rs ric prs:bool r:bool
+@X_tlbie    .. rs:5 - ric:2 prs:1 r:1 rb:5 .. . 
_tlbie


You're marking bit 11 as ignored but you're not marking 31 as ignored. 
The way

the argument patterns are made in this file seems to be either not mark the
ignored bits (e.g. most of args from the start of the file) or mark all 
ignore

bits (e.g. @XL_S from RFEBB).

I am being petty, yes. This makes no functional change in the 
instruction, but

I'd rather mark bit 31 as ignored in @X_tlbie as well.

I did that in my tree and it seems to work fine. If you're ok with this 
change,




Reviewed-by: Daniel Henrique Barboza 



Right, the @X_tlbie pattern ended up inconsistent with regard to ignored 
bits. I'm ok with changing bit 31 of it to ignored.


Talking with the guys here, they've explained me that it is usually 
better to use '.' with format definitions ('@'), to make it easier to 
reuse them for more instructions, some of which may ignore a given bit 
while others may not. But for @X_tlbie it's ok to use dots or dashes for 
bits 11 and 31, as it's used only by TLBIE and TLBIEL.


Thanks,
Leandro





+
+TLBIE   01 . - .. . . . 0100110010 -
@X_tlbie
+TLBIEL  01 . - .. . . . 0100010010 -
@X_tlbie

Re: [PATCH v8 02/12] s390x/cpu_topology: CPU topology objects and structures

2022-07-14 Thread Pierre Morel





On 7/14/22 14:50, Janis Schoetterl-Glausch wrote:

On 7/14/22 13:25, Pierre Morel wrote:

[...]



That is sure.
I thought about put a fatal error report during the initialization in the 
s390_topology_setup()


And you can set thread > 1 today, so we'd need to handle that. (increase the 
number of cpus instead and print a warning?)

[...]


this would introduce arch dependencies in the hw/core/
I think that the error report for Z is enough.

So once we support Multithreading in the guest we can adjust it easier without 
involving the common code.

Or we can introduce a thread_supported in SMPCompatProps, which would be good.
I would prefer to propose this outside of the series and suppress the fatal 
error once it is adopted.



Yeah, could be a separate series, but then the question remains what you in 
this one, that is
if you change the code so it would be correct if multithreading were supported.


I would like to first not support multi-thread and do a fatal error if 
threads are defined or implicitly defined as different of 1.


I prefer to keep multithreading for later, I did not have a look at all 
the implications for the moment.





+
+/*
+ * Setting the first topology: 1 book, 1 socket
+ * This is enough for 64 cores if the topology is flat (single socket)
+ */
+void s390_topology_setup(MachineState *ms)
+{
+    DeviceState *dev;
+
+    /* Create BOOK bridge device */
+    dev = qdev_new(TYPE_S390_TOPOLOGY_BOOK);
+    object_property_add_child(qdev_get_machine(),
+  TYPE_S390_TOPOLOGY_BOOK, OBJECT(dev));


Why add it to the machine instead of directly using a static?


For my opinion it is a characteristic of the machine.


So it's visible to the user via info qtree or something?


It is already visible to the user on info qtree.


Would that even be the appropriate location to show that?


That is a very good question and I really appreciate if we discuss on the 
design before diving into details.

The idea is to have the architecture details being on qtree as object so we can 
plug new drawers/books/socket/cores and in the future when the infrastructure 
allows it unplug them.


Would it not be more accurate to say that we plug in new cpus only?
Since you need to specify the topology up front with -smp and it cannot change 
after.


smp specify the maximum we can have.
I thought we can add dynamically elements inside this maximum set.


So that all is static, books/sockets might be completely unpopulated, but they 
still exist in a way.
As far as I understand, STSI only allows for cpus to change, nothing above it.


I thought we want to plug new books or drawers but I may be wrong.


So you want to be able to plug in, for example, a socket without any cpus in it?
I'm not seeing anything in the description of STSI that forbids having empty 
containers
or containers with a cpu entry without any cpus. But I don't know why that 
would be useful.
And if you don't want empty containers, then the container will just show up 
when plugging in the cpu.


You already convinced me, it is a non sense and, anyway, building every 
container when a cpu is added is how it works with the current 
implementation.



--
Pierre Morel
IBM Lab Boeblingen

[PATCH v7 10/10] i386/pc: restrict AMD only enforcing of 1Tb hole to new machine type

2022-07-14 Thread Joao Martins

The added enforcing is only relevant in the case of AMD where the
range right before the 1TB is restricted and cannot be DMA mapped
by the kernel consequently leading to IOMMU INVALID_DEVICE_REQUEST
or possibly other kinds of IOMMU events in the AMD IOMMU.

Although, there's a case where it may make sense to disable the
IOVA relocation/validation when migrating from a
non-amd-1tb-aware qemu to one that supports it.

Relocating RAM regions to after the 1Tb hole has consequences for
guest ABI because we are changing the memory mapping, so make
sure that only new machine enforce but not older ones.

Signed-off-by: Joao Martins 
Acked-by: Dr. David Alan Gilbert 
Acked-by: Igor Mammedov 
---
 hw/i386/pc.c | 6 --
 hw/i386/pc_piix.c| 2 ++
 hw/i386/pc_q35.c | 2 ++
 include/hw/i386/pc.h | 1 +
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 17613974163e..d13a77025054 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -952,9 +952,10 @@ void pc_memory_init(PCMachineState *pcms,
 /*
  * The HyperTransport range close to the 1T boundary is unique to AMD
  * hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
- * to above 1T to AMD vCPUs only.
+ * to above 1T to AMD vCPUs only. @enforce_amd_1tb_hole is only false in
+ * older machine types (<= 7.0) for compatibility purposes.
  */
-if (IS_AMD_CPU(>env)) {
+if (IS_AMD_CPU(>env) && pcmc->enforce_amd_1tb_hole) {
 /* Bail out if max possible address does not cross HT range */
 if (pc_max_used_gpa(pcms, pci_hole64_size) >= AMD_HT_START) {
 x86ms->above_4g_mem_start = AMD_ABOVE_1TB_START;
@@ -1904,6 +1905,7 @@ static void pc_machine_class_init(ObjectClass *oc, void 
*data)
 pcmc->has_reserved_memory = true;
 pcmc->kvmclock_enabled = true;
 pcmc->enforce_aligned_dimm = true;
+pcmc->enforce_amd_1tb_hole = true;
 /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
  * to be used at the moment, 32K should be enough for a while.  */
 pcmc->acpi_data_size = 0x2 + 0x8000;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 2a483e8666b4..074571bc03a8 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -446,9 +446,11 @@ DEFINE_I440FX_MACHINE(v7_1, "pc-i440fx-7.1", NULL,
 
 static void pc_i440fx_7_0_machine_options(MachineClass *m)
 {
+PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
 pc_i440fx_7_1_machine_options(m);
 m->alias = NULL;
 m->is_default = false;
+pcmc->enforce_amd_1tb_hole = false;
 compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
 compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
 }
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 99ed75371c67..f3aa4694a299 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -383,8 +383,10 @@ DEFINE_Q35_MACHINE(v7_1, "pc-q35-7.1", NULL,
 
 static void pc_q35_7_0_machine_options(MachineClass *m)
 {
+PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
 pc_q35_7_1_machine_options(m);
 m->alias = NULL;
+pcmc->enforce_amd_1tb_hole = false;
 compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
 compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
 }
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 568c226d3034..9cc3f5d33805 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -118,6 +118,7 @@ struct PCMachineClass {
 bool has_reserved_memory;
 bool enforce_aligned_dimm;
 bool broken_reserved_end;
+bool enforce_amd_1tb_hole;
 
 /* generate legacy CPU hotplug AML */
 bool legacy_cpu_hotplug;
-- 
2.17.2

[PATCH v7 06/10] i386/pc: factor out cxl range start to helper

2022-07-14 Thread Joao Martins

Factor out the calculation of the base address of the memory region.
It will be used later on for the cxl range end counterpart calculation
and as well in pc_memory_init() CXL memory region initialization, thus
avoiding duplication.

Cc: Jonathan Cameron 
Signed-off-by: Joao Martins 
---
 hw/i386/pc.c | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 1f42f194d7b7..3fdcab4bb4f3 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -825,6 +825,22 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
 return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 }
 
+static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
+{
+PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+MachineState *machine = MACHINE(pcms);
+hwaddr cxl_base;
+
+if (pcmc->has_reserved_memory && machine->device_memory->base) {
+cxl_base = machine->device_memory->base
++ memory_region_size(>device_memory->mr);
+} else {
+cxl_base = pc_above_4g_end(pcms);
+}
+
+return cxl_base;
+}
+
 static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
 {
 uint64_t start = 0;
@@ -946,15 +962,7 @@ void pc_memory_init(PCMachineState *pcms,
 MemoryRegion *mr = >cxl_devices_state.host_mr;
 hwaddr cxl_size = MiB;
 
-if (pcmc->has_reserved_memory && machine->device_memory->base) {
-cxl_base = machine->device_memory->base
-+ memory_region_size(>device_memory->mr);
-} else if (pcms->sgx_epc.size != 0) {
-cxl_base = sgx_epc_above_4g_end(>sgx_epc);
-} else {
-cxl_base = pc_above_4g_end(pcms);
-}
-
+cxl_base = pc_get_cxl_range_start(pcms);
 e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
 memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
 memory_region_add_subregion(system_memory, cxl_base, mr);
-- 
2.17.2

Re: [PATCH v3 2/2] target/ppc: Implement ISA 3.00 tlbie[l]

2022-07-14 Thread Daniel Henrique Barboza





On 7/12/22 16:37, Leandro Lupori wrote:

This initial version supports the invalidation of one or all
TLB entries. Flush by PID/LPID, or based in process/partition
scope is not supported, because it would make using the
generic QEMU TLB implementation hard. In these cases, all
entries are flushed.

Signed-off-by: Leandro Lupori 
---


Reviewed-by: Daniel Henrique Barboza 



  target/ppc/helper.h  |   2 +
  target/ppc/mmu-book3s-v3.h   |  15 ++
  target/ppc/mmu_helper.c  | 154 +++
  target/ppc/translate/storage-ctrl-impl.c.inc |  17 ++
  4 files changed, 188 insertions(+)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index d627cfe6ed..90d16f00e7 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -672,6 +672,8 @@ DEF_HELPER_FLAGS_1(tlbia, TCG_CALL_NO_RWG, void, env)
  DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_NO_RWG, void, env, tl)
  DEF_HELPER_FLAGS_2(tlbiva, TCG_CALL_NO_RWG, void, env, tl)
  #if defined(TARGET_PPC64)
+DEF_HELPER_FLAGS_4(tlbie_isa300, TCG_CALL_NO_WG, void, \
+env, tl, tl, i32)
  DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_NO_RWG, void, env, tl, tl)
  DEF_HELPER_2(load_slb_esid, tl, env, tl)
  DEF_HELPER_2(load_slb_vsid, tl, env, tl)
diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
index d6d5ed8f8e..674377a19e 100644
--- a/target/ppc/mmu-book3s-v3.h
+++ b/target/ppc/mmu-book3s-v3.h
@@ -50,6 +50,21 @@ struct prtb_entry {
  
  #ifdef TARGET_PPC64
  
+/*

+ * tlbie[l] helper flags
+ *
+ * RIC, PRS, R and local are passed as flags in the last argument.
+ */
+#define TLBIE_F_RIC_SHIFT   0
+#define TLBIE_F_PRS_SHIFT   2
+#define TLBIE_F_R_SHIFT 3
+#define TLBIE_F_LOCAL_SHIFT 4
+
+#define TLBIE_F_RIC_MASK(3 << TLBIE_F_RIC_SHIFT)
+#define TLBIE_F_PRS (1 << TLBIE_F_PRS_SHIFT)
+#define TLBIE_F_R   (1 << TLBIE_F_R_SHIFT)
+#define TLBIE_F_LOCAL   (1 << TLBIE_F_LOCAL_SHIFT)
+
  static inline bool ppc64_use_proc_tbl(PowerPCCPU *cpu)
  {
  return !!(cpu->env.spr[SPR_LPCR] & LPCR_UPRT);
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index 15239dc95b..b881aee23f 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -429,6 +429,160 @@ void helper_tlbie(CPUPPCState *env, target_ulong addr)
  ppc_tlb_invalidate_one(env, addr);
  }
  
+#if defined(TARGET_PPC64)

+
+/* Invalidation Selector */
+#define TLBIE_IS_VA 0
+#define TLBIE_IS_PID1
+#define TLBIE_IS_LPID   2
+#define TLBIE_IS_ALL3
+
+/* Radix Invalidation Control */
+#define TLBIE_RIC_TLB   0
+#define TLBIE_RIC_PWC   1
+#define TLBIE_RIC_ALL   2
+#define TLBIE_RIC_GRP   3
+
+/* Radix Actual Page sizes */
+#define TLBIE_R_AP_4K   0
+#define TLBIE_R_AP_64K  5
+#define TLBIE_R_AP_2M   1
+#define TLBIE_R_AP_1G   2
+
+/* RB field masks */
+#define TLBIE_RB_EPN_MASK   PPC_BITMASK(0, 51)
+#define TLBIE_RB_IS_MASKPPC_BITMASK(52, 53)
+#define TLBIE_RB_AP_MASKPPC_BITMASK(56, 58)
+
+void helper_tlbie_isa300(CPUPPCState *env, target_ulong rb, target_ulong rs,
+ uint32_t flags)
+{
+unsigned ric = (flags & TLBIE_F_RIC_MASK) >> TLBIE_F_RIC_SHIFT;
+/*
+ * With the exception of the checks for invalid instruction forms,
+ * PRS is currently ignored, because we don't know if a given TLB entry
+ * is process or partition scoped.
+ */
+bool prs = flags & TLBIE_F_PRS;
+bool r = flags & TLBIE_F_R;
+bool local = flags & TLBIE_F_LOCAL;
+bool effR;
+unsigned is = extract64(rb, PPC_BIT_NR(53), 2), set;
+unsigned ap;/* actual page size */
+target_ulong addr, pgoffs_mask;
+
+qemu_log_mask(CPU_LOG_MMU,
+"%s: local=%d addr=" TARGET_FMT_lx " ric=%u prs=%d r=%d is=%u\n",
+__func__, local, rb & TARGET_PAGE_MASK, ric, prs, r, is);
+
+effR = FIELD_EX64(env->msr, MSR, HV) ? r : env->spr[SPR_LPCR] & LPCR_HR;
+
+/* Partial TLB invalidation is supported for Radix only for now. */
+if (!effR) {
+goto inval_all;
+}
+
+/* Check for invalid instruction forms (effR=1). */
+if (unlikely(ric == TLBIE_RIC_GRP ||
+ ((ric == TLBIE_RIC_PWC || ric == TLBIE_RIC_ALL) &&
+   is == TLBIE_IS_VA) ||
+ (!prs && is == TLBIE_IS_PID))) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"%s: invalid instruction form: ric=%u prs=%d r=%d is=%u\n",
+__func__, ric, prs, r, is);
+goto invalid;
+}
+
+/* We don't cache Page Walks. */
+if (ric == TLBIE_RIC_PWC) {
+if (local) {
+set = extract64(rb, PPC_BIT_NR(51), 12);
+if (set != 0) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid set: %d\n",
+  __func__, set);
+goto invalid;
+}
+}
+return;
+}
+
+/*
+ * Invalidation by LPID or

Re: [PATCH v8 00/12] s390x: CPU Topology

2022-07-14 Thread Janis Schoetterl-Glausch

On 6/20/22 16:03, Pierre Morel wrote:
> Hi,
> 
> This new spin is essentially for coherence with the last Linux CPU
> Topology patch, function testing and coding style modifications.
> 
> Forword
> ===
> 
> The goal of this series is to implement CPU topology for S390, it
> improves the preceeding series with the implementation of books and
> drawers, of non uniform CPU topology and with documentation.
> 
> To use these patches, you will need the Linux series version 10.
> You find it there:
> https://lkml.org/lkml/2022/6/20/590
> 
> Currently this code is for KVM only, I have no idea if it is interesting
> to provide a TCG patch. If ever it will be done in another series.
> 
> To have a better understanding of the S390x CPU Topology and its
> implementation in QEMU you can have a look at the documentation in the
> last patch or follow the introduction here under.
> 
> A short introduction
> 
> 
> CPU Topology is described in the S390 POP with essentially the description
> of two instructions:
> 
> PTF Perform Topology function used to poll for topology change
> and used to set the polarization but this part is not part of this item.
> 
> STSI Store System Information and the SYSIB 15.1.x providing the Topology
> configuration.
> 
> S390 Topology is a 6 levels hierarchical topology with up to 5 level
> of containers. The last topology level, specifying the CPU cores.
> 
> This patch series only uses the two lower levels sockets and cores.
> 
> To get the information on the topology, S390 provides the STSI
> instruction, which stores a structures providing the list of the
> containers used in the Machine topology: the SYSIB.
> A selector within the STSI instruction allow to chose how many topology
> levels will be provide in the SYSIB.
> 
> Using the Topology List Entries (TLE) provided inside the SYSIB we
> the Linux kernel is able to compute the information about the cache
> distance between two cores and can use this information to take
> scheduling decisions.

Do the socket, book, ... metaphors and looking at STSI from the existing
smp infrastructure even make sense?

STSI 15.1.x reports the topology to the guest and for a virtual machine,
this topology can be very dynamic. So a CPU can move from from one topology
container to another, but the socket of a cpu changing while it's running seems
a bit strange. And this isn't supported by this patch series as far as I 
understand,
the only topology changes are on hotplug.

[PATCH v7 05/10] i386/pc: factor out cxl range end to helper

2022-07-14 Thread Joao Martins

Move calculation of CXL memory region end to separate helper.

This is in preparation to a future change that removes CXL range
dependency on the CXL memory region, with the goal of allowing
pc_pci_hole64_start() to be called before any memory region are
initialized.

Cc: Jonathan Cameron 
Signed-off-by: Joao Martins 
---
 hw/i386/pc.c | 31 +--
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 216e38da938e..1f42f194d7b7 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -825,6 +825,25 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
 return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 }
 
+static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
+{
+uint64_t start = 0;
+
+if (pcms->cxl_devices_state.host_mr.addr) {
+start = pcms->cxl_devices_state.host_mr.addr +
+memory_region_size(>cxl_devices_state.host_mr);
+if (pcms->cxl_devices_state.fixed_windows) {
+GList *it;
+for (it = pcms->cxl_devices_state.fixed_windows; it; it = 
it->next) {
+CXLFixedWindow *fw = it->data;
+start = fw->mr.addr + memory_region_size(>mr);
+}
+}
+}
+
+return start;
+}
+
 void pc_memory_init(PCMachineState *pcms,
 MemoryRegion *system_memory,
 MemoryRegion *rom_memory,
@@ -1022,16 +1041,8 @@ uint64_t pc_pci_hole64_start(void)
 MachineState *ms = MACHINE(pcms);
 uint64_t hole64_start = 0;
 
-if (pcms->cxl_devices_state.host_mr.addr) {
-hole64_start = pcms->cxl_devices_state.host_mr.addr +
-memory_region_size(>cxl_devices_state.host_mr);
-if (pcms->cxl_devices_state.fixed_windows) {
-GList *it;
-for (it = pcms->cxl_devices_state.fixed_windows; it; it = 
it->next) {
-CXLFixedWindow *fw = it->data;
-hole64_start = fw->mr.addr + memory_region_size(>mr);
-}
-}
+if (pcms->cxl_devices_state.is_enabled) {
+hole64_start = pc_get_cxl_range_end(pcms);
 } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
 hole64_start = ms->device_memory->base;
 if (!pcmc->broken_reserved_end) {
-- 
2.17.2

[PATCH v7 00/10] i386/pc: Fix creation of >= 1010G guests on AMD systems with IOMMU

2022-07-14 Thread Joao Martins

v6[7] -> v7:

* Rebased to latest staging
* Build on top of apply CXL cleanups (Igor Mammedov)
* Use qdev property rather introducing new acessors to the i440fx pci-host 
(Bernhard Beschow)
* Add Igor's Rb to patch 4 (Igor Mammedov)
* Replace pci_hole64_start() related helper functions rather than coexisting 
with MR variant
code in patches 4-8. This removes unneeded code that no longer needs to be tied 
to MR (Igor Mammedov)
* Replace MR with memory region in the whole series (Igor Mammedov)
* Remove pc_set_amd_above_4g_mem_start() and move maxusedaddr check into being 
generic (Igor Mammedov)
* Make pc_max_used_gpa() handle 32-bit phys-bits to avoid qtests breakage as 
part
of maxusedaddr check being generic.
* Fix off-by-one in calculation in pc_max_used_gpa()
* Rename enforce_valid_iova to more explicit name enforce_amd_1tb_hole (Igor 
Mammedov)
* Added David's and Igor's Ack in the last patch (Dr. David Gilbert, Igor 
Mammedov)

Note: This series builds on top of Jonathan Cameron's CXL cleanups
(https://lore.kernel.org/qemu-devel/20220701132300.2264-1-jonathan.came...@huawei.com/).

---

This series lets Qemu spawn i386 guests with >= 1010G with VFIO,
particularly when running on AMD systems with an IOMMU.

Since Linux v5.4, VFIO validates whether the IOVA in DMA_MAP ioctl is valid and 
it
will return -EINVAL on those cases. On x86, Intel hosts aren't particularly
affected by this extra validation. But AMD systems with IOMMU have a hole in
the 1TB boundary which is *reserved* for HyperTransport I/O addresses located
here: FD__h - FF__h. See IOMMU manual [1], specifically
section '2.1.2 IOMMU Logical Topology', Table 3 on what those addresses mean.

VFIO DMA_MAP calls in this IOVA address range fall through this check and hence 
return
 -EINVAL, consequently failing the creation the guests bigger than 1010G. 
Example
of the failure:

qemu-system-x86_64: -device vfio-pci,host=:41:10.1,bootindex=-1: 
VFIO_MAP_DMA: -22
qemu-system-x86_64: -device vfio-pci,host=:41:10.1,bootindex=-1: vfio 
:41:10.1: 
failed to setup container for group 258: memory listener initialization 
failed:
Region pc.ram: vfio_dma_map(0x55ba53e7a9d0, 0x1, 
0xff3000, 0x7ed243e0) = -22 (Invalid argument)

Prior to v5.4, we could map to these IOVAs *but* that's still not the right 
thing
to do and could trigger certain IOMMU events (e.g. INVALID_DEVICE_REQUEST), or
spurious guest VF failures from the resultant IOMMU target abort (see Errata 
1155[2])
as documented on the links down below.

This small series tries to address that by dealing with this AMD-specific 1Tb 
hole,
but rather than dealing like the 4G hole, it instead relocates RAM above 4G
to be above the 1T if the maximum RAM range crosses the HT reserved range.
It is organized as following:

patch 1: Introduce a @above_4g_mem_start which defaults to 4 GiB as starting
 address of the 4G boundary

patches 2-3: Move pci-host qdev creation to be before pc_memory_init(),
 to get accessing to pci_hole64_size. The actual pci-host
 initialization is kept as is, only the qdev_new.

patch 4: Small deduplication cleanup that was spread around pc

patches 5-8: Make pc_pci_hole64_start() be callable before pc_memory_init()
 initializes any memory regions. This way, the returned value
 is consistent and we don't need to duplicate same said
 calculations when detecting the relocation is needed.

patch 9: Change @above_4g_mem_start to 1TiB /if we are on AMD and the max
possible address acrosses the HT region. Errors out if the phys-bits is too
low, which is only the case for >=1010G configurations or something that
crosses the HT region.

patch 10: Ensure valid IOVAs only on new machine types, but not older
ones (<= v7.0.0)

The 'consequence' of this approach is that we may need more than the default
phys-bits e.g. a guest with >1010G, will have most of its RAM after the 1TB
address, consequently needing 41 phys-bits as opposed to the default of 40
(TCG_PHYS_ADDR_BITS). Today there's already a precedent to depend on the user to
pick the right value of phys-bits (regardless of this series), so we warn in
case phys-bits aren't enough. Finally, CMOS loosing its meaning of the above 4G
ram blocks, but it was mentioned over RFC that CMOS is only useful for very
old seabios. 

Additionally, the reserved region is added to E820 if the relocation is done
or if the phys-bits can cover it.

Alternative options considered (in RFC[0]):

a) Dealing with the 1T hole like the 4G hole -- which also represents what
hardware closely does.

Thanks,
Joao

Older Changelog,

v5[6] -> v6:
* Rebased to latest staging
* Consider @cxl_base setting to also use above_4g_mem_start (Igor Mammedov)
* Use 4 * GiB instead of raw hex (Igor Mammedov)
* Delete @host_type (Igor Mammedov)
* Rename to i440fx_dev to i440fx_host (Igor Mammedov)
* Rebase on top of patch that removes

Re: [PATCH v3 1/2] target/ppc: Move tlbie[l] to decode tree

2022-07-14 Thread Daniel Henrique Barboza





On 7/12/22 16:37, Leandro Lupori wrote:

Also decode RIC, PRS and R operands.

Signed-off-by: Leandro Lupori 
---
  target/ppc/cpu_init.c|  4 +-
  target/ppc/insn32.decode |  8 ++
  target/ppc/translate.c   | 64 +-
  target/ppc/translate/storage-ctrl-impl.c.inc | 87 
  4 files changed, 99 insertions(+), 64 deletions(-)
  create mode 100644 target/ppc/translate/storage-ctrl-impl.c.inc

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index c16cb8dbe7..8d7e77f778 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6368,7 +6368,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
 PPC_FLOAT_EXT |
 PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
 PPC_MEM_SYNC | PPC_MEM_EIEIO |
-   PPC_MEM_TLBSYNC |
+   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
 PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
 PPC_SEGMENT_64B | PPC_SLBI |
 PPC_POPCNTB | PPC_POPCNTWD |
@@ -6585,7 +6585,7 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
 PPC_FLOAT_EXT |
 PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
 PPC_MEM_SYNC | PPC_MEM_EIEIO |
-   PPC_MEM_TLBSYNC |
+   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
 PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
 PPC_SEGMENT_64B | PPC_SLBI |
 PPC_POPCNTB | PPC_POPCNTWD |
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 6ea48d5163..2b985249b8 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -809,3 +809,11 @@ VMODSD  000100 . . . 1001011@VX
  VMODUD  000100 . . . 11011001011@VX
  VMODSQ  000100 . . . 1111011@VX
  VMODUQ  000100 . . . 1101011@VX
+
+## TLB Management Instructions
+
+_tlbierb rs ric prs:bool r:bool
+@X_tlbie.. rs:5 - ric:2 prs:1 r:1 rb:5 .. . _tlbie


You're marking bit 11 as ignored but you're not marking 31 as ignored. The way
the argument patterns are made in this file seems to be either not mark the
ignored bits (e.g. most of args from the start of the file) or mark all ignore
bits (e.g. @XL_S from RFEBB).

I am being petty, yes. This makes no functional change in the instruction, but
I'd rather mark bit 31 as ignored in @X_tlbie as well.

I did that in my tree and it seems to work fine. If you're ok with this change,



Reviewed-by: Daniel Henrique Barboza 




+
+TLBIE   01 . - .. . . . 0100110010 -@X_tlbie
+TLBIEL  01 . - .. . . . 0100010010 -@X_tlbie
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 1d6daa4608..4fcb311c2d 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -5424,64 +5424,6 @@ static void gen_tlbia(DisasContext *ctx)
  #endif  /* defined(CONFIG_USER_ONLY) */
  }
  
-/* tlbiel */

-static void gen_tlbiel(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV;
-#else
-bool psr = (ctx->opcode >> 17) & 0x1;
-
-if (ctx->pr || (!ctx->hv && !psr && ctx->hr)) {
-/*
- * tlbiel is privileged except when PSR=0 and HR=1, making it
- * hypervisor privileged.
- */
-GEN_PRIV;
-}
-
-gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-/* tlbie */
-static void gen_tlbie(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV;
-#else
-bool psr = (ctx->opcode >> 17) & 0x1;
-TCGv_i32 t1;
-
-if (ctx->pr) {
-/* tlbie is privileged... */
-GEN_PRIV;
-} else if (!ctx->hv) {
-if (!ctx->gtse || (!psr && ctx->hr)) {
-/*
- * ... except when GTSE=0 or when PSR=0 and HR=1, making it
- * hypervisor privileged.
- */
-GEN_PRIV;
-}
-}
-
-if (NARROW_MODE(ctx)) {
-TCGv t0 = tcg_temp_new();
-tcg_gen_ext32u_tl(t0, cpu_gpr[rB(ctx->opcode)]);
-gen_helper_tlbie(cpu_env, t0);
-tcg_temp_free(t0);
-} else {
-gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-}
-t1 = tcg_temp_new_i32();
-tcg_gen_ld_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
-tcg_gen_ori_i32(t1, t1, TLB_NEED_GLOBAL_FLUSH);
-tcg_gen_st_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
-tcg_temp_free_i32(t1);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
  /* tlbsync */
  static void gen_tlbsync(DisasContext *ctx)
  {
@@ -6699,6 +6641,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, 
arg_PLS_D *a)
  
  #include "translate/branch-impl.c.inc"
  
+#include

[PATCH v7 04/10] i386/pc: factor out above-4g end to an helper

2022-07-14 Thread Joao Martins

There's a couple of places that seem to duplicate this calculation
of RAM size above the 4G boundary. Move all those to a helper function.

Signed-off-by: Joao Martins 
Reviewed-by: Igor Mammedov 
---
 hw/i386/pc.c | 27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e952dc62a12e..216e38da938e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -814,6 +814,17 @@ void xen_load_linux(PCMachineState *pcms)
 #define PC_ROM_ALIGN   0x800
 #define PC_ROM_SIZE(PC_ROM_MAX - PC_ROM_MIN_VGA)
 
+static hwaddr pc_above_4g_end(PCMachineState *pcms)
+{
+X86MachineState *x86ms = X86_MACHINE(pcms);
+
+if (pcms->sgx_epc.size != 0) {
+return sgx_epc_above_4g_end(>sgx_epc);
+}
+
+return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
+}
+
 void pc_memory_init(PCMachineState *pcms,
 MemoryRegion *system_memory,
 MemoryRegion *rom_memory,
@@ -891,15 +902,8 @@ void pc_memory_init(PCMachineState *pcms,
 exit(EXIT_FAILURE);
 }
 
-if (pcms->sgx_epc.size != 0) {
-machine->device_memory->base = 
sgx_epc_above_4g_end(>sgx_epc);
-} else {
-machine->device_memory->base =
-x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
-}
-
 machine->device_memory->base =
-ROUND_UP(machine->device_memory->base, 1 * GiB);
+ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
 
 if (pcmc->enforce_aligned_dimm) {
 /* size device region assuming 1G page max alignment per slot */
@@ -929,7 +933,7 @@ void pc_memory_init(PCMachineState *pcms,
 } else if (pcms->sgx_epc.size != 0) {
 cxl_base = sgx_epc_above_4g_end(>sgx_epc);
 } else {
-cxl_base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
+cxl_base = pc_above_4g_end(pcms);
 }
 
 e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
@@ -1016,7 +1020,6 @@ uint64_t pc_pci_hole64_start(void)
 PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
 PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
 MachineState *ms = MACHINE(pcms);
-X86MachineState *x86ms = X86_MACHINE(pcms);
 uint64_t hole64_start = 0;
 
 if (pcms->cxl_devices_state.host_mr.addr) {
@@ -1034,10 +1037,8 @@ uint64_t pc_pci_hole64_start(void)
 if (!pcmc->broken_reserved_end) {
 hole64_start += memory_region_size(>device_memory->mr);
 }
-} else if (pcms->sgx_epc.size != 0) {
-hole64_start = sgx_epc_above_4g_end(>sgx_epc);
 } else {
-hole64_start = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
+hole64_start = pc_above_4g_end(pcms);
 }
 
 return ROUND_UP(hole64_start, 1 * GiB);
-- 
2.17.2

[PATCH v7 02/10] i386/pc: create pci-host qdev prior to pc_memory_init()

2022-07-14 Thread Joao Martins

At the start of pc_memory_init() we usually pass a range of
0..UINT64_MAX as pci_memory, when really its 2G (i440fx) or
32G (q35). To get the real user value, we need to get pci-host
passed property for default pci_hole64_size. Thus to get that,
create the qdev prior to memory init to better make estimations
on max used/phys addr.

This is in preparation to determine that host-phys-bits are
enough and also for pci-hole64-size to be considered to relocate
ram-above-4g to be at 1T (on AMD platforms).

Signed-off-by: Joao Martins 
Reviewed-by: Igor Mammedov 
---
 hw/i386/pc_piix.c| 7 +--
 hw/i386/pc_q35.c | 6 +++---
 hw/pci-host/i440fx.c | 5 ++---
 include/hw/pci-host/i440fx.h | 3 ++-
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index a234989ac363..6186a1473755 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -91,6 +91,7 @@ static void pc_init1(MachineState *machine,
 MemoryRegion *pci_memory;
 MemoryRegion *rom_memory;
 ram_addr_t lowmem;
+DeviceState *i440fx_host;
 
 /*
  * Calculate ram split, for memory below and above 4G.  It's a bit
@@ -164,9 +165,11 @@ static void pc_init1(MachineState *machine,
 pci_memory = g_new(MemoryRegion, 1);
 memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
 rom_memory = pci_memory;
+i440fx_host = qdev_new(host_type);
 } else {
 pci_memory = NULL;
 rom_memory = system_memory;
+i440fx_host = NULL;
 }
 
 pc_guest_info_init(pcms);
@@ -200,8 +203,8 @@ static void pc_init1(MachineState *machine,
 const char *type = xen_enabled() ? TYPE_PIIX3_XEN_DEVICE
  : TYPE_PIIX3_DEVICE;
 
-pci_bus = i440fx_init(host_type,
-  pci_type,
+pci_bus = i440fx_init(pci_type,
+  i440fx_host,
   system_memory, system_io, machine->ram_size,
   x86ms->below_4g_mem_size,
   x86ms->above_4g_mem_size,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index f96cbd04e284..46ea89e564de 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -203,12 +203,12 @@ static void pc_q35_init(MachineState *machine)
 pcms->smbios_entry_point_type);
 }
 
-/* allocate ram and load rom/bios */
-pc_memory_init(pcms, get_system_memory(), rom_memory, _memory);
-
 /* create pci host bus */
 q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE));
 
+/* allocate ram and load rom/bios */
+pc_memory_init(pcms, get_system_memory(), rom_memory, _memory);
+
 object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host));
 object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM,
  OBJECT(ram_memory), NULL);
diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c
index 1c5ad5f918a2..d5426ef4a53c 100644
--- a/hw/pci-host/i440fx.c
+++ b/hw/pci-host/i440fx.c
@@ -237,7 +237,8 @@ static void i440fx_realize(PCIDevice *dev, Error **errp)
 }
 }
 
-PCIBus *i440fx_init(const char *host_type, const char *pci_type,
+PCIBus *i440fx_init(const char *pci_type,
+DeviceState *dev,
 MemoryRegion *address_space_mem,
 MemoryRegion *address_space_io,
 ram_addr_t ram_size,
@@ -246,7 +247,6 @@ PCIBus *i440fx_init(const char *host_type, const char 
*pci_type,
 MemoryRegion *pci_address_space,
 MemoryRegion *ram_memory)
 {
-DeviceState *dev;
 PCIBus *b;
 PCIDevice *d;
 PCIHostState *s;
@@ -254,7 +254,6 @@ PCIBus *i440fx_init(const char *host_type, const char 
*pci_type,
 unsigned i;
 I440FXState *i440fx;
 
-dev = qdev_new(host_type);
 s = PCI_HOST_BRIDGE(dev);
 b = pci_root_bus_new(dev, NULL, pci_address_space,
  address_space_io, 0, TYPE_PCI_BUS);
diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h
index 52518dbf08e6..d02bf1ed6b93 100644
--- a/include/hw/pci-host/i440fx.h
+++ b/include/hw/pci-host/i440fx.h
@@ -35,7 +35,8 @@ struct PCII440FXState {
 
 #define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX"
 
-PCIBus *i440fx_init(const char *host_type, const char *pci_type,
+PCIBus *i440fx_init(const char *pci_type,
+DeviceState *dev,
 MemoryRegion *address_space_mem,
 MemoryRegion *address_space_io,
 ram_addr_t ram_size,
-- 
2.17.2

[PATCH v7 09/10] i386/pc: relocate 4g start to 1T where applicable

2022-07-14 Thread Joao Martins

It is assumed that the whole GPA space is available to be DMA
addressable, within a given address space limit, except for a
tiny region before the 4G. Since Linux v5.4, VFIO validates
whether the selected GPA is indeed valid i.e. not reserved by
IOMMU on behalf of some specific devices or platform-defined
restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with
 -EINVAL.

AMD systems with an IOMMU are examples of such platforms and
particularly may only have these ranges as allowed:

 - fedf (0  .. 3.982G)
fef0 - 00fc (3.983G .. 1011.9G)
0100 -  (1Tb.. 16Pb[*])

We already account for the 4G hole, albeit if the guest is big
enough we will fail to allocate a guest with  >1010G due to the
~12G hole at the 1Tb boundary, reserved for HyperTransport (HT).

[*] there is another reserved region unrelated to HT that exists
in the 256T boundary in Fam 17h according to Errata #1286,
documeted also in "Open-Source Register Reference for AMD Family
17h Processors (PUB)"

When creating the region above 4G, take into account that on AMD
platforms the HyperTransport range is reserved and hence it
cannot be used either as GPAs. On those cases rather than
establishing the start of ram-above-4g to be 4G, relocate instead
to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical
Topology", for more information on the underlying restriction of
IOVAs.

After accounting for the 1Tb hole on AMD hosts, mtree should
look like:

-7fff (prio 0, i/o):
 alias ram-below-4g @pc.ram -7fff
0100-01ff7fff (prio 0, i/o):
alias ram-above-4g @pc.ram 8000-00ff

If the relocation is done or the address space covers it, we
also add the the reserved HT e820 range as reserved.

Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough
to address 1Tb (0xff  ). On AMD platforms, if a
ram-above-4g relocation may be desired and the CPU wasn't configured
with a big enough phys-bits, print an error message to the user
and do not make the relocation of the above-4g-region if phys-bits
is too low.

Suggested-by: Igor Mammedov 
Signed-off-by: Joao Martins 
---
 hw/i386/pc.c | 82 
 1 file changed, 82 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index cda435e3baeb..17613974163e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -880,6 +880,52 @@ static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
 return start;
 }
 
+static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
+{
+X86CPU *cpu = X86_CPU(first_cpu);
+
+/* 32-bit systems don't have hole64 thus return max CPU address */
+if (cpu->phys_bits <= 32) {
+return ((hwaddr)1 << cpu->phys_bits) - 1;
+}
+
+return pc_pci_hole64_start() + pci_hole64_size - 1;
+}
+
+/*
+ * AMD systems with an IOMMU have an additional hole close to the
+ * 1Tb, which are special GPAs that cannot be DMA mapped. Depending
+ * on kernel version, VFIO may or may not let you DMA map those ranges.
+ * Starting Linux v5.4 we validate it, and can't create guests on AMD machines
+ * with certain memory sizes. It's also wrong to use those IOVA ranges
+ * in detriment of leading to IOMMU INVALID_DEVICE_REQUEST or worse.
+ * The ranges reserved for Hyper-Transport are:
+ *
+ * FD__h - FF__h
+ *
+ * The ranges represent the following:
+ *
+ * Base Address   Top Address  Use
+ *
+ * FD__h FD_F7FF_h Reserved interrupt address space
+ * FD_F800_h FD_F8FF_h Interrupt/EOI IntCtl
+ * FD_F900_h FD_F90F_h Legacy PIC IACK
+ * FD_F910_h FD_F91F_h System Management
+ * FD_F920_h FD_FAFF_h Reserved Page Tables
+ * FD_FB00_h FD_FBFF_h Address Translation
+ * FD_FC00_h FD_FDFF_h I/O Space
+ * FD_FE00_h FD__h Configuration
+ * FE__h FE_1FFF_h Extended Configuration/Device Messages
+ * FE_2000_h FF__h Reserved
+ *
+ * See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology",
+ * Table 3: Special Address Controls (GPA) for more information.
+ */
+#define AMD_HT_START 0xfdUL
+#define AMD_HT_END   0xffUL
+#define AMD_ABOVE_1TB_START  (AMD_HT_END + 1)
+#define AMD_HT_SIZE  (AMD_ABOVE_1TB_START - AMD_HT_START)
+
 void pc_memory_init(PCMachineState *pcms,
 MemoryRegion *system_memory,
 MemoryRegion *rom_memory,
@@ -894,13 +940,49 @@ void pc_memory_init(PCMachineState *pcms,
 MachineClass *mc = MACHINE_GET_CLASS(machine);
 PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
 X86MachineState *x86ms = X86_MACHINE(pcms);
+hwaddr maxphysaddr, maxusedaddr;
 hwaddr cxl_base, cxl_resv_end = 0;
+X86CPU *cpu = X86_CPU(first_cpu);
 
 assert(machine->ram_size == x86ms->below_4g_mem_size +

[PATCH v5 7/8] hw/arm: Set drive property for at24c eeprom

2022-07-14 Thread Hao Wu

This patch allows the user to attach an external drive as a property
for an onboard at24c eeprom device. It uses an unit number to
distinguish different devices.

Signed-off-by: Hao Wu 
---
 hw/arm/npcm7xx_boards.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
index b083b0c572..b8337871ba 100644
--- a/hw/arm/npcm7xx_boards.c
+++ b/hw/arm/npcm7xx_boards.c
@@ -141,11 +141,16 @@ static I2CBus *npcm7xx_i2c_get_bus(NPCM7xxState *soc, 
uint32_t num)
 }
 
 static void at24c_eeprom_init(I2CBus *i2c_bus, int bus, uint8_t addr,
-  uint32_t rsize)
+  uint32_t rsize, int unit)
 {
 I2CSlave *i2c_dev = i2c_slave_new("at24c-eeprom", addr);
 DeviceState *dev = DEVICE(i2c_dev);
+DriveInfo *dinfo;
 
+dinfo = drive_get(IF_OTHER, bus, unit);
+if (dinfo) {
+qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo));
+}
 qdev_prop_set_uint32(dev, "rom-size", rsize);
 i2c_slave_realize_and_unref(i2c_dev, i2c_bus, _abort);
 }
@@ -252,8 +257,8 @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc)
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 3), "tmp105", 0x5c);
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 4), "tmp105", 0x5c);
 
-at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 9), 9, 0x55, 8192);
-at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 10), 10, 0x55, 8192);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 9), 9, 0x55, 8192, 0);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 10), 10, 0x55, 8192, 1);
 
 /*
  * i2c-11:
@@ -360,7 +365,7 @@ static void kudo_bmc_i2c_init(NPCM7xxState *soc)
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 4), TYPE_PCA9548, 0x77);
 
 /* mbfru */
-at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 4), 4, 0x50, 8192);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 4), 4, 0x50, 8192, 0);
 
 i2c_mux = i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 13),
   TYPE_PCA9548, 0x77);
@@ -372,7 +377,7 @@ static void kudo_bmc_i2c_init(NPCM7xxState *soc)
 i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), "tmp105", 0x49);
 
 /* bmcfru */
-at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 14), 14, 0x55, 8192);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 14), 14, 0x55, 8192, 1);
 
 /* TODO: Add remaining i2c devices. */
 }
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH v5 6/8] hw/arm: npcm8xx_boards: EEPROMs can take bus as parameter

2022-07-14 Thread Hao Wu

We allow at24c_eeprom_init to take a I2CBus* as parameter. This allows
us to attach an EEPROM device behind an I2C mux which is not
possible with the old method.

Signed-off-by: Hao Wu 
---
 hw/arm/npcm7xx_boards.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
index 6bc6f5d2fe..b083b0c572 100644
--- a/hw/arm/npcm7xx_boards.c
+++ b/hw/arm/npcm7xx_boards.c
@@ -140,10 +140,9 @@ static I2CBus *npcm7xx_i2c_get_bus(NPCM7xxState *soc, 
uint32_t num)
 return I2C_BUS(qdev_get_child_bus(DEVICE(>smbus[num]), "i2c-bus"));
 }
 
-static void at24c_eeprom_init(NPCM7xxState *soc, int bus, uint8_t addr,
+static void at24c_eeprom_init(I2CBus *i2c_bus, int bus, uint8_t addr,
   uint32_t rsize)
 {
-I2CBus *i2c_bus = npcm7xx_i2c_get_bus(soc, bus);
 I2CSlave *i2c_dev = i2c_slave_new("at24c-eeprom", addr);
 DeviceState *dev = DEVICE(i2c_dev);
 
@@ -253,8 +252,8 @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc)
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 3), "tmp105", 0x5c);
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 4), "tmp105", 0x5c);
 
-at24c_eeprom_init(soc, 9, 0x55, 8192);
-at24c_eeprom_init(soc, 10, 0x55, 8192);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 9), 9, 0x55, 8192);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 10), 10, 0x55, 8192);
 
 /*
  * i2c-11:
@@ -360,7 +359,8 @@ static void kudo_bmc_i2c_init(NPCM7xxState *soc)
 
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 4), TYPE_PCA9548, 0x77);
 
-at24c_eeprom_init(soc, 4, 0x50, 8192); /* mbfru */
+/* mbfru */
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 4), 4, 0x50, 8192);
 
 i2c_mux = i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 13),
   TYPE_PCA9548, 0x77);
@@ -371,7 +371,8 @@ static void kudo_bmc_i2c_init(NPCM7xxState *soc)
 i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 4), "tmp105", 0x48);
 i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), "tmp105", 0x49);
 
-at24c_eeprom_init(soc, 14, 0x55, 8192); /* bmcfru */
+/* bmcfru */
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 14), 14, 0x55, 8192);
 
 /* TODO: Add remaining i2c devices. */
 }
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH v7 03/10] i386/pc: pass pci_hole64_size to pc_memory_init()

2022-07-14 Thread Joao Martins

Use the pre-initialized pci-host qdev and fetch the
pci-hole64-size into pc_memory_init() newly added argument.
Use PCI_HOST_PROP_PCI_HOLE64_SIZE pci-host property for
fetching pci-hole64-size.

This is in preparation to determine that host-phys-bits are
enough and for pci-hole64-size to be considered to relocate
ram-above-4g to be at 1T (on AMD platforms).

Signed-off-by: Joao Martins 
Reviewed-by: Igor Mammedov 
---
 hw/i386/pc.c |  3 ++-
 hw/i386/pc_piix.c|  7 ++-
 hw/i386/pc_q35.c | 10 +-
 include/hw/i386/pc.h |  3 ++-
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 1660684d12fd..e952dc62a12e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -817,7 +817,8 @@ void xen_load_linux(PCMachineState *pcms)
 void pc_memory_init(PCMachineState *pcms,
 MemoryRegion *system_memory,
 MemoryRegion *rom_memory,
-MemoryRegion **ram_memory)
+MemoryRegion **ram_memory,
+uint64_t pci_hole64_size)
 {
 int linux_boot, i;
 MemoryRegion *option_rom_mr;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 6186a1473755..2a483e8666b4 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -91,6 +91,7 @@ static void pc_init1(MachineState *machine,
 MemoryRegion *pci_memory;
 MemoryRegion *rom_memory;
 ram_addr_t lowmem;
+uint64_t hole64_size;
 DeviceState *i440fx_host;
 
 /*
@@ -166,10 +167,14 @@ static void pc_init1(MachineState *machine,
 memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
 rom_memory = pci_memory;
 i440fx_host = qdev_new(host_type);
+hole64_size = object_property_get_uint(OBJECT(i440fx_host),
+   PCI_HOST_PROP_PCI_HOLE64_SIZE,
+   _abort);
 } else {
 pci_memory = NULL;
 rom_memory = system_memory;
 i440fx_host = NULL;
+hole64_size = 0;
 }
 
 pc_guest_info_init(pcms);
@@ -186,7 +191,7 @@ static void pc_init1(MachineState *machine,
 /* allocate ram and load rom/bios */
 if (!xen_enabled()) {
 pc_memory_init(pcms, system_memory,
-   rom_memory, _memory);
+   rom_memory, _memory, hole64_size);
 } else {
 pc_system_flash_cleanup_unused(pcms);
 if (machine->kernel_filename != NULL) {
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 46ea89e564de..99ed75371c67 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -138,6 +138,7 @@ static void pc_q35_init(MachineState *machine)
 MachineClass *mc = MACHINE_GET_CLASS(machine);
 bool acpi_pcihp;
 bool keep_pci_slot_hpc;
+uint64_t pci_hole64_size = 0;
 
 /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
  * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
@@ -206,8 +207,15 @@ static void pc_q35_init(MachineState *machine)
 /* create pci host bus */
 q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE));
 
+if (pcmc->pci_enabled) {
+pci_hole64_size = object_property_get_uint(OBJECT(q35_host),
+   
PCI_HOST_PROP_PCI_HOLE64_SIZE,
+   _abort);
+}
+
 /* allocate ram and load rom/bios */
-pc_memory_init(pcms, get_system_memory(), rom_memory, _memory);
+pc_memory_init(pcms, get_system_memory(), rom_memory, _memory,
+   pci_hole64_size);
 
 object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host));
 object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index b7735dccfc81..568c226d3034 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -159,7 +159,8 @@ void xen_load_linux(PCMachineState *pcms);
 void pc_memory_init(PCMachineState *pcms,
 MemoryRegion *system_memory,
 MemoryRegion *rom_memory,
-MemoryRegion **ram_memory);
+MemoryRegion **ram_memory,
+uint64_t pci_hole64_size);
 uint64_t pc_pci_hole64_start(void);
 DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
 void pc_basic_device_init(struct PCMachineState *pcms,
-- 
2.17.2

[PATCH v7 08/10] i386/pc: factor out device_memory base/size to helper

2022-07-14 Thread Joao Martins

Move obtaining hole64_start from device_memory memory region base/size
into an helper alongside correspondent getters in pc_memory_init() when
the hotplug range is unitialized. While doing that remove the memory
region based logic from this newly added helper.

This is the final step that allows pc_pci_hole64_start() to be callable
at the beginning of pc_memory_init() before any memory regions are
initialized.

Cc: Jonathan Cameron 
Signed-off-by: Joao Martins 
---
 hw/i386/pc.c | 47 ---
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index c654be6cf0bd..cda435e3baeb 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -825,15 +825,37 @@ static hwaddr pc_above_4g_end(PCMachineState *pcms)
 return x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 }
 
-static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
+static void pc_get_device_memory_range(PCMachineState *pcms,
+   hwaddr *base,
+   ram_addr_t *device_mem_size)
 {
 PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
 MachineState *machine = MACHINE(pcms);
+ram_addr_t size;
+hwaddr addr;
+
+size = machine->maxram_size - machine->ram_size;
+addr = ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
+
+if (pcmc->enforce_aligned_dimm) {
+/* size device region assuming 1G page max alignment per slot */
+size += (1 * GiB) * machine->ram_slots;
+}
+
+*base = addr;
+*device_mem_size = size;
+}
+
+
+static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
+{
+PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
 hwaddr cxl_base;
+ram_addr_t size;
 
-if (pcmc->has_reserved_memory && machine->device_memory->base) {
-cxl_base = machine->device_memory->base
-+ memory_region_size(>device_memory->mr);
+if (pcmc->has_reserved_memory) {
+pc_get_device_memory_range(pcms, _base, );
+cxl_base += size;
 } else {
 cxl_base = pc_above_4g_end(pcms);
 }
@@ -920,7 +942,7 @@ void pc_memory_init(PCMachineState *pcms,
 /* initialize device memory address space */
 if (pcmc->has_reserved_memory &&
 (machine->ram_size < machine->maxram_size)) {
-ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size;
+ram_addr_t device_mem_size;
 
 if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) {
 error_report("unsupported amount of memory slots: %"PRIu64,
@@ -935,13 +957,7 @@ void pc_memory_init(PCMachineState *pcms,
 exit(EXIT_FAILURE);
 }
 
-machine->device_memory->base =
-ROUND_UP(pc_above_4g_end(pcms), 1 * GiB);
-
-if (pcmc->enforce_aligned_dimm) {
-/* size device region assuming 1G page max alignment per slot */
-device_mem_size += (1 * GiB) * machine->ram_slots;
-}
+pc_get_device_memory_range(pcms, >device_memory->base, 
_mem_size);
 
 if ((machine->device_memory->base + device_mem_size) <
 device_mem_size) {
@@ -1046,13 +1062,14 @@ uint64_t pc_pci_hole64_start(void)
 PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
 MachineState *ms = MACHINE(pcms);
 uint64_t hole64_start = 0;
+ram_addr_t size = 0;
 
 if (pcms->cxl_devices_state.is_enabled) {
 hole64_start = pc_get_cxl_range_end(pcms);
-} else if (pcmc->has_reserved_memory && ms->device_memory->base) {
-hole64_start = ms->device_memory->base;
+} else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
+pc_get_device_memory_range(pcms, _start, );
 if (!pcmc->broken_reserved_end) {
-hole64_start += memory_region_size(>device_memory->mr);
+hole64_start += size;
 }
 } else {
 hole64_start = pc_above_4g_end(pcms);
-- 
2.17.2

[PATCH v5 4/8] hw/adc: Make adci[*] R/W in NPCM7XX ADC

2022-07-14 Thread Hao Wu

Our sensor test requires both reading and writing from a sensor's
QOM property. So we need to make the input of ADC module R/W instead
of write only for that to work.

Signed-off-by: Hao Wu 
Reviewed-by: Titus Rwantare 
Reviewed-by: Peter Maydell 
---
 hw/adc/npcm7xx_adc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/adc/npcm7xx_adc.c b/hw/adc/npcm7xx_adc.c
index 47fb9e5f74..bc6f3f55e6 100644
--- a/hw/adc/npcm7xx_adc.c
+++ b/hw/adc/npcm7xx_adc.c
@@ -242,7 +242,7 @@ static void npcm7xx_adc_init(Object *obj)
 
 for (i = 0; i < NPCM7XX_ADC_NUM_INPUTS; ++i) {
 object_property_add_uint32_ptr(obj, "adci[*]",
->adci[i], OBJ_PROP_FLAG_WRITE);
+>adci[i], OBJ_PROP_FLAG_READWRITE);
 }
 object_property_add_uint32_ptr(obj, "vref",
 >vref, OBJ_PROP_FLAG_WRITE);
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH v7 01/10] hw/i386: add 4g boundary start to X86MachineState

2022-07-14 Thread Joao Martins

Rather than hardcoding the 4G boundary everywhere, introduce a
X86MachineState field @above_4g_mem_start and use it
accordingly.

This is in preparation for relocating ram-above-4g to be
dynamically start at 1T on AMD platforms.

Signed-off-by: Joao Martins 
Reviewed-by: Igor Mammedov 
---
 hw/i386/acpi-build.c  |  2 +-
 hw/i386/pc.c  | 11 ++-
 hw/i386/sgx.c |  2 +-
 hw/i386/x86.c |  1 +
 include/hw/i386/x86.h |  3 +++
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index cad6f5ac41e9..0355bd3ddaad 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2024,7 +2024,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
MachineState *machine)
 build_srat_memory(table_data, mem_base, mem_len, i - 1,
   MEM_AFFINITY_ENABLED);
 }
-mem_base = 1ULL << 32;
+mem_base = x86ms->above_4g_mem_start;
 mem_len = next_base - x86ms->below_4g_mem_size;
 next_base = mem_base + mem_len;
 }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8d68295fdaff..1660684d12fd 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -850,9 +850,10 @@ void pc_memory_init(PCMachineState *pcms,
  machine->ram,
  x86ms->below_4g_mem_size,
  x86ms->above_4g_mem_size);
-memory_region_add_subregion(system_memory, 0x1ULL,
+memory_region_add_subregion(system_memory, x86ms->above_4g_mem_start,
 ram_above_4g);
-e820_add_entry(0x1ULL, x86ms->above_4g_mem_size, E820_RAM);
+e820_add_entry(x86ms->above_4g_mem_start, x86ms->above_4g_mem_size,
+   E820_RAM);
 }
 
 if (pcms->sgx_epc.size != 0) {
@@ -893,7 +894,7 @@ void pc_memory_init(PCMachineState *pcms,
 machine->device_memory->base = 
sgx_epc_above_4g_end(>sgx_epc);
 } else {
 machine->device_memory->base =
-0x1ULL + x86ms->above_4g_mem_size;
+x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 }
 
 machine->device_memory->base =
@@ -927,7 +928,7 @@ void pc_memory_init(PCMachineState *pcms,
 } else if (pcms->sgx_epc.size != 0) {
 cxl_base = sgx_epc_above_4g_end(>sgx_epc);
 } else {
-cxl_base = 0x1ULL + x86ms->above_4g_mem_size;
+cxl_base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 }
 
 e820_add_entry(cxl_base, cxl_size, E820_RESERVED);
@@ -1035,7 +1036,7 @@ uint64_t pc_pci_hole64_start(void)
 } else if (pcms->sgx_epc.size != 0) {
 hole64_start = sgx_epc_above_4g_end(>sgx_epc);
 } else {
-hole64_start = 0x1ULL + x86ms->above_4g_mem_size;
+hole64_start = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 }
 
 return ROUND_UP(hole64_start, 1 * GiB);
diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
index a44d66ba2afc..09d9c7c73d9f 100644
--- a/hw/i386/sgx.c
+++ b/hw/i386/sgx.c
@@ -295,7 +295,7 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
 return;
 }
 
-sgx_epc->base = 0x1ULL + x86ms->above_4g_mem_size;
+sgx_epc->base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 
 memory_region_init(_epc->mr, OBJECT(pcms), "sgx-epc", UINT64_MAX);
 memory_region_add_subregion(get_system_memory(), sgx_epc->base,
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 6003b4b2dfea..029264c54fe2 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1373,6 +1373,7 @@ static void x86_machine_initfn(Object *obj)
 x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
 x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
 x86ms->bus_lock_ratelimit = 0;
+x86ms->above_4g_mem_start = 4 * GiB;
 }
 
 static void x86_machine_class_init(ObjectClass *oc, void *data)
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 9089bdd99c3a..df82c5fd4252 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -56,6 +56,9 @@ struct X86MachineState {
 /* RAM information (sizes, addresses, configuration): */
 ram_addr_t below_4g_mem_size, above_4g_mem_size;
 
+/* Start address of the initial RAM above 4G */
+uint64_t above_4g_mem_start;
+
 /* CPU and apic information: */
 bool apic_xrupt_override;
 unsigned pci_irq_mask;
-- 
2.17.2

[PATCH v5 2/8] hw/i2c: Read FIFO during RXF_CTL change in NPCM7XX SMBus

2022-07-14 Thread Hao Wu

Originally we read in from SMBus when RXF_STS is cleared. However,
the driver clears RXF_STS before setting RXF_CTL, causing the SM bus
module to read incorrect amount of bytes in FIFO mode when the number
of bytes read changed. This patch fixes this issue.

Signed-off-by: Hao Wu 
Reviewed-by: Titus Rwantare 
Acked-by: Corey Minyard 
---
 hw/i2c/npcm7xx_smbus.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/i2c/npcm7xx_smbus.c b/hw/i2c/npcm7xx_smbus.c
index f18e311556..1435daea94 100644
--- a/hw/i2c/npcm7xx_smbus.c
+++ b/hw/i2c/npcm7xx_smbus.c
@@ -637,9 +637,6 @@ static void npcm7xx_smbus_write_rxf_sts(NPCM7xxSMBusState 
*s, uint8_t value)
 {
 if (value & NPCM7XX_SMBRXF_STS_RX_THST) {
 s->rxf_sts &= ~NPCM7XX_SMBRXF_STS_RX_THST;
-if (s->status == NPCM7XX_SMBUS_STATUS_RECEIVING) {
-npcm7xx_smbus_recv_fifo(s);
-}
 }
 }
 
@@ -651,6 +648,9 @@ static void npcm7xx_smbus_write_rxf_ctl(NPCM7xxSMBusState 
*s, uint8_t value)
 new_ctl = KEEP_OLD_BIT(s->rxf_ctl, new_ctl, NPCM7XX_SMBRXF_CTL_LAST);
 }
 s->rxf_ctl = new_ctl;
+if (s->status == NPCM7XX_SMBUS_STATUS_RECEIVING) {
+npcm7xx_smbus_recv_fifo(s);
+}
 }
 
 static uint64_t npcm7xx_smbus_read(void *opaque, hwaddr offset, unsigned size)
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH v7 07/10] i386/pc: handle unitialized mr in pc_get_cxl_range_end()

2022-07-14 Thread Joao Martins

Remove pc_get_cxl_range_end() dependency on the CXL memory region,
and replace with one that does not require the CXL host_mr to determine
the start of CXL start.

This in preparation to allow pc_pci_hole64_start() to be called early
in pc_memory_init(), handle CXL memory region end when its underlying
memory region isn't yet initialized.

Cc: Jonathan Cameron 
Signed-off-by: Joao Martins 
---
 hw/i386/pc.c | 18 --
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 3fdcab4bb4f3..c654be6cf0bd 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -843,17 +843,15 @@ static uint64_t pc_get_cxl_range_start(PCMachineState 
*pcms)
 
 static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
 {
-uint64_t start = 0;
+uint64_t start = pc_get_cxl_range_start(pcms) + MiB;
 
-if (pcms->cxl_devices_state.host_mr.addr) {
-start = pcms->cxl_devices_state.host_mr.addr +
-memory_region_size(>cxl_devices_state.host_mr);
-if (pcms->cxl_devices_state.fixed_windows) {
-GList *it;
-for (it = pcms->cxl_devices_state.fixed_windows; it; it = 
it->next) {
-CXLFixedWindow *fw = it->data;
-start = fw->mr.addr + memory_region_size(>mr);
-}
+if (pcms->cxl_devices_state.fixed_windows) {
+GList *it;
+
+start = ROUND_UP(start, 256 * MiB);
+for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
+CXLFixedWindow *fw = it->data;
+start += fw->size;
 }
 }
 
-- 
2.17.2

[PATCH v5 3/8] hw/adc: Fix CONV bit in NPCM7XX ADC CON register

2022-07-14 Thread Hao Wu

The correct bit for the CONV bit in NPCM7XX ADC is bit 13. This patch
fixes that in the module, and also lower the IRQ when the guest
is done handling an interrupt event from the ADC module.

Signed-off-by: Hao Wu 
Reviewed-by: Patrick Venture
Reviewed-by: Peter Maydell 
---
 hw/adc/npcm7xx_adc.c   | 2 +-
 tests/qtest/npcm7xx_adc-test.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/adc/npcm7xx_adc.c b/hw/adc/npcm7xx_adc.c
index 0f0a9f63e2..47fb9e5f74 100644
--- a/hw/adc/npcm7xx_adc.c
+++ b/hw/adc/npcm7xx_adc.c
@@ -36,7 +36,7 @@ REG32(NPCM7XX_ADC_DATA, 0x4)
 #define NPCM7XX_ADC_CON_INT BIT(18)
 #define NPCM7XX_ADC_CON_EN  BIT(17)
 #define NPCM7XX_ADC_CON_RST BIT(16)
-#define NPCM7XX_ADC_CON_CONVBIT(14)
+#define NPCM7XX_ADC_CON_CONVBIT(13)
 #define NPCM7XX_ADC_CON_DIV(rv) extract32(rv, 1, 8)
 
 #define NPCM7XX_ADC_MAX_RESULT  1023
diff --git a/tests/qtest/npcm7xx_adc-test.c b/tests/qtest/npcm7xx_adc-test.c
index 3fa6d9ece0..8048044d28 100644
--- a/tests/qtest/npcm7xx_adc-test.c
+++ b/tests/qtest/npcm7xx_adc-test.c
@@ -50,7 +50,7 @@
 #define CON_INT BIT(18)
 #define CON_EN  BIT(17)
 #define CON_RST BIT(16)
-#define CON_CONVBIT(14)
+#define CON_CONVBIT(13)
 #define CON_DIV(rv) extract32(rv, 1, 8)
 
 #define FST_RDSTBIT(1)
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH v5 8/8] hw/arm: quanta-gbs-bmc add i2c devices

2022-07-14 Thread Hao Wu

From: Patrick Venture 

Adds supported i2c devices to the quanta-gbc-bmc board.

Signed-off-by: Patrick Venture 
Reviewed-by: Hao Wu 
---
 hw/arm/npcm7xx_boards.c | 82 -
 1 file changed, 49 insertions(+), 33 deletions(-)

diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
index b8337871ba..4bae5589f0 100644
--- a/hw/arm/npcm7xx_boards.c
+++ b/hw/arm/npcm7xx_boards.c
@@ -290,10 +290,12 @@ static void quanta_gsj_fan_init(NPCM7xxMachine *machine, 
NPCM7xxState *soc)
 
 static void quanta_gbs_i2c_init(NPCM7xxState *soc)
 {
+I2CSlave *i2c_mux;
+
+/* i2c-0: */
+i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 0), TYPE_PCA9546, 0x71);
+
 /*
- * i2c-0:
- * pca9546@71
- *
  * i2c-1:
  * pca9535@24
  * pca9535@20
@@ -302,46 +304,60 @@ static void quanta_gbs_i2c_init(NPCM7xxState *soc)
  * pca9535@23
  * pca9535@25
  * pca9535@26
- *
- * i2c-2:
- * sbtsi@4c
- *
- * i2c-5:
- * atmel,24c64@50 mb_fru
- * pca9546@71
- * - channel 0: max31725@54
- * - channel 1: max31725@55
- * - channel 2: max31725@5d
- *  atmel,24c64@51 fan_fru
- * - channel 3: atmel,24c64@52 hsbp_fru
- *
+ */
+
+/* i2c-2: sbtsi@4c */
+
+/* i2c-5: */
+/* mb_fru */
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 5), 5, 0x50, 8192, 0);
+i2c_mux = i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 5),
+  TYPE_PCA9546, 0x71);
+/* max31725 is tmp105 compatible. */
+i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 0), "tmp105", 0x54);
+i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1), "tmp105", 0x55);
+i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 2), "tmp105", 0x5d);
+/* fan_fru */
+at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 2), 5, 0x51, 8192, 1);
+/* hsbp_fru */
+at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 3), 5, 0x52, 8192, 2);
+
+/*
  * i2c-6:
  * pca9545@73
  *
  * i2c-7:
  * pca9545@72
- *
- * i2c-8:
- * adi,adm1272@10
- *
- * i2c-9:
- * pca9546@71
- * - channel 0: isil,isl68137@60
- * - channel 1: isil,isl68137@61
- * - channel 2: isil,isl68137@63
- * - channel 3: isil,isl68137@45
- *
+ */
+
+/* i2c-8: */
+i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 8), "adm1272", 0x10);
+
+/* i2c-9: */
+i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 9), TYPE_PCA9546, 0x71);
+/*
+ * - channel 0: isil,isl68137@60
+ * - channel 1: isil,isl68137@61
+ * - channel 2: isil,isl68137@63
+ * - channel 3: isil,isl68137@45
+ */
+
+/*
  * i2c-10:
  * pca9545@71
  *
  * i2c-11:
  * pca9545@76
- *
- * i2c-12:
- * maxim,max34451@4e
- * isil,isl68137@5d
- * isil,isl68137@5e
- *
+ */
+
+/* i2c-12: */
+i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 12), "max34451", 0x4e);
+/*
+ * isil,isl68137@5d
+ * isil,isl68137@5e
+ */
+
+/*
  * i2c-14:
  * pca9545@70
  */
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH v5 1/8] hw/i2c: Clear ACK bit in NPCM7xx SMBus module

2022-07-14 Thread Hao Wu

The ACK bit in NPCM7XX SMBus module should be cleared each time it
sends out a NACK signal. This patch fixes the bug that it fails to
do so.

Signed-off-by: Hao Wu 
Reviewed-by: Titus Rwantare 
Reviewed-by: Peter Maydell 
---
 hw/i2c/npcm7xx_smbus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i2c/npcm7xx_smbus.c b/hw/i2c/npcm7xx_smbus.c
index e7e0ba66fe..f18e311556 100644
--- a/hw/i2c/npcm7xx_smbus.c
+++ b/hw/i2c/npcm7xx_smbus.c
@@ -270,7 +270,7 @@ static void npcm7xx_smbus_recv_byte(NPCM7xxSMBusState *s)
 if (s->st & NPCM7XX_SMBCTL1_ACK) {
 trace_npcm7xx_smbus_nack(DEVICE(s)->canonical_path);
 i2c_nack(s->bus);
-s->st &= NPCM7XX_SMBCTL1_ACK;
+s->st &= ~NPCM7XX_SMBCTL1_ACK;
 }
 trace_npcm7xx_smbus_recv_byte((DEVICE(s)->canonical_path), s->sda);
 npcm7xx_smbus_update_irq(s);
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH v5 5/8] blockdev: Add a new IF type IF_OTHER

2022-07-14 Thread Hao Wu

This type is used to represent block devs that are not suitable to
be represented by other existing types.

A sample use is to represent an at24c eeprom device defined in
hw/nvram/eeprom_at24c.c. The block device can be used to contain the
content of the said eeprom device.

Signed-off-by: Hao Wu 
---
 blockdev.c| 4 +++-
 include/sysemu/blockdev.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 9230888e34..befd69ac5f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -82,6 +82,7 @@ static const char *const if_name[IF_COUNT] = {
 [IF_MTD] = "mtd",
 [IF_SD] = "sd",
 [IF_VIRTIO] = "virtio",
+[IF_OTHER] = "other",
 [IF_XEN] = "xen",
 };
 
@@ -726,7 +727,8 @@ QemuOptsList qemu_legacy_drive_opts = {
 },{
 .name = "if",
 .type = QEMU_OPT_STRING,
-.help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio)",
+.help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio,"
+" other)",
 },{
 .name = "file",
 .type = QEMU_OPT_STRING,
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index 3211b16513..d9dd5af291 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -21,6 +21,7 @@ typedef enum {
  */
 IF_NONE = 0,
 IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN,
+IF_OTHER,
 IF_COUNT
 } BlockInterfaceType;
 
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH v5 0/8] Misc NPCM7XX patches

2022-07-14 Thread Hao Wu

[NOTE: I'm reviving a bunch of patches that was in the process of
upstreaming a while ago but paused.]

This patch set contains a few bug fixes and I2C devices for some
NPCM7XX boards.

Patch 1~2 fix a problem that causes the SMBus module to behave
incorrectly when it's in FIFO mode and trying to receive more than
16 bytes at a time.

Patch 3 fixes a error in a register for ADC module.

Patch 4 makes the ADC input to be R/W instead of write only. It allows
a test system to read these via QMP and has no negative effect.

Patch 5 adds a new blockdev IF type IF_OTHER.

Patch 6 allows at24c_eeprom_init to take a bus as parameter so it can
be used by more use cases (e.g. behind an I2C mux.)

Patch 7 allows at24c_eeprom_init to take a drive as property, similar
to sdhci_attach_device().

Patch 8 uses the function defined in patch 5 to add the EEPROM and other
I2C devices for Quanta GBS board.

-- Changes since v4:
1. Add comments to patch 5.
2. Split patch 6 into 2 patches according to the feedback.  Each patch does it 
own task.

-- Changes since v3:
1. Add a new blockdev IF type IF_OTHER.
2. Use IF_OTHER instead of IF_NONE.

-- Changes since v2:
1. Dropped patch 7.
2. Drop an extra variable in patch 5.

-- Changes since v1:
1. Rewrote patch 5 to implement the function in NPCM7xx board file instead
   of the EEPROM device file.
2. Slightly modify patch 6 to adapt to the changes and QEMU comment style.
3. Squash patch 7 into patch 5 to make it compile.
4. Add a new patch 7.

Hao Wu (7):
  hw/i2c: Clear ACK bit in NPCM7xx SMBus module
  hw/i2c: Read FIFO during RXF_CTL change in NPCM7XX SMBus
  hw/adc: Fix CONV bit in NPCM7XX ADC CON register
  hw/adc: Make adci[*] R/W in NPCM7XX ADC
  blockdev: Add a new IF type IF_OTHER
  hw/arm: npcm8xx_boards: EEPROMs can take bus as parameter
  hw/arm: Set drive property for at24c eeprom

Patrick Venture (1):
  hw/arm: quanta-gbs-bmc add i2c devices

 blockdev.c |   4 +-
 hw/adc/npcm7xx_adc.c   |   4 +-
 hw/arm/npcm7xx_boards.c| 102 -
 hw/i2c/npcm7xx_smbus.c |   8 +--
 include/sysemu/blockdev.h  |   1 +
 tests/qtest/npcm7xx_adc-test.c |   2 +-
 6 files changed, 73 insertions(+), 48 deletions(-)

-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 3/8] hw/adc: Fix CONV bit in NPCM7XX ADC CON register

2022-07-14 Thread Hao Wu

The correct bit for the CONV bit in NPCM7XX ADC is bit 13. This patch
fixes that in the module, and also lower the IRQ when the guest
is done handling an interrupt event from the ADC module.

Signed-off-by: Hao Wu 
Reviewed-by: Patrick Venture
Reviewed-by: Peter Maydell 
---
 hw/adc/npcm7xx_adc.c   | 2 +-
 tests/qtest/npcm7xx_adc-test.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/adc/npcm7xx_adc.c b/hw/adc/npcm7xx_adc.c
index 0f0a9f63e2..47fb9e5f74 100644
--- a/hw/adc/npcm7xx_adc.c
+++ b/hw/adc/npcm7xx_adc.c
@@ -36,7 +36,7 @@ REG32(NPCM7XX_ADC_DATA, 0x4)
 #define NPCM7XX_ADC_CON_INT BIT(18)
 #define NPCM7XX_ADC_CON_EN  BIT(17)
 #define NPCM7XX_ADC_CON_RST BIT(16)
-#define NPCM7XX_ADC_CON_CONVBIT(14)
+#define NPCM7XX_ADC_CON_CONVBIT(13)
 #define NPCM7XX_ADC_CON_DIV(rv) extract32(rv, 1, 8)
 
 #define NPCM7XX_ADC_MAX_RESULT  1023
diff --git a/tests/qtest/npcm7xx_adc-test.c b/tests/qtest/npcm7xx_adc-test.c
index 3fa6d9ece0..8048044d28 100644
--- a/tests/qtest/npcm7xx_adc-test.c
+++ b/tests/qtest/npcm7xx_adc-test.c
@@ -50,7 +50,7 @@
 #define CON_INT BIT(18)
 #define CON_EN  BIT(17)
 #define CON_RST BIT(16)
-#define CON_CONVBIT(14)
+#define CON_CONVBIT(13)
 #define CON_DIV(rv) extract32(rv, 1, 8)
 
 #define FST_RDSTBIT(1)
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 1/8] hw/i2c: Clear ACK bit in NPCM7xx SMBus module

2022-07-14 Thread Hao Wu

The ACK bit in NPCM7XX SMBus module should be cleared each time it
sends out a NACK signal. This patch fixes the bug that it fails to
do so.

Signed-off-by: Hao Wu 
Reviewed-by: Titus Rwantare 
Reviewed-by: Peter Maydell 
---
 hw/i2c/npcm7xx_smbus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i2c/npcm7xx_smbus.c b/hw/i2c/npcm7xx_smbus.c
index e7e0ba66fe..f18e311556 100644
--- a/hw/i2c/npcm7xx_smbus.c
+++ b/hw/i2c/npcm7xx_smbus.c
@@ -270,7 +270,7 @@ static void npcm7xx_smbus_recv_byte(NPCM7xxSMBusState *s)
 if (s->st & NPCM7XX_SMBCTL1_ACK) {
 trace_npcm7xx_smbus_nack(DEVICE(s)->canonical_path);
 i2c_nack(s->bus);
-s->st &= NPCM7XX_SMBCTL1_ACK;
+s->st &= ~NPCM7XX_SMBCTL1_ACK;
 }
 trace_npcm7xx_smbus_recv_byte((DEVICE(s)->canonical_path), s->sda);
 npcm7xx_smbus_update_irq(s);
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 5/8] blockdev: Add a new IF type IF_OTHER

2022-07-14 Thread Hao Wu

This type is used to represent block devs that are not suitable to
be represented by other existing types.

A sample use is to represent an at24c eeprom device defined in
hw/nvram/eeprom_at24c.c. The block device can be used to contain the
content of the said eeprom device.

Signed-off-by: Hao Wu 
---
 blockdev.c| 4 +++-
 include/sysemu/blockdev.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 9230888e34..befd69ac5f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -82,6 +82,7 @@ static const char *const if_name[IF_COUNT] = {
 [IF_MTD] = "mtd",
 [IF_SD] = "sd",
 [IF_VIRTIO] = "virtio",
+[IF_OTHER] = "other",
 [IF_XEN] = "xen",
 };
 
@@ -726,7 +727,8 @@ QemuOptsList qemu_legacy_drive_opts = {
 },{
 .name = "if",
 .type = QEMU_OPT_STRING,
-.help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio)",
+.help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio,"
+" other)",
 },{
 .name = "file",
 .type = QEMU_OPT_STRING,
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index 3211b16513..d9dd5af291 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -21,6 +21,7 @@ typedef enum {
  */
 IF_NONE = 0,
 IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN,
+IF_OTHER,
 IF_COUNT
 } BlockInterfaceType;
 
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 4/8] hw/adc: Make adci[*] R/W in NPCM7XX ADC

2022-07-14 Thread Hao Wu

Our sensor test requires both reading and writing from a sensor's
QOM property. So we need to make the input of ADC module R/W instead
of write only for that to work.

Signed-off-by: Hao Wu 
Reviewed-by: Titus Rwantare 
Reviewed-by: Peter Maydell 
---
 hw/adc/npcm7xx_adc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/adc/npcm7xx_adc.c b/hw/adc/npcm7xx_adc.c
index 47fb9e5f74..bc6f3f55e6 100644
--- a/hw/adc/npcm7xx_adc.c
+++ b/hw/adc/npcm7xx_adc.c
@@ -242,7 +242,7 @@ static void npcm7xx_adc_init(Object *obj)
 
 for (i = 0; i < NPCM7XX_ADC_NUM_INPUTS; ++i) {
 object_property_add_uint32_ptr(obj, "adci[*]",
->adci[i], OBJ_PROP_FLAG_WRITE);
+>adci[i], OBJ_PROP_FLAG_READWRITE);
 }
 object_property_add_uint32_ptr(obj, "vref",
 >vref, OBJ_PROP_FLAG_WRITE);
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 2/8] hw/i2c: Read FIFO during RXF_CTL change in NPCM7XX SMBus

2022-07-14 Thread Hao Wu

Originally we read in from SMBus when RXF_STS is cleared. However,
the driver clears RXF_STS before setting RXF_CTL, causing the SM bus
module to read incorrect amount of bytes in FIFO mode when the number
of bytes read changed. This patch fixes this issue.

Signed-off-by: Hao Wu 
Reviewed-by: Titus Rwantare 
Acked-by: Corey Minyard 
---
 hw/i2c/npcm7xx_smbus.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/i2c/npcm7xx_smbus.c b/hw/i2c/npcm7xx_smbus.c
index f18e311556..1435daea94 100644
--- a/hw/i2c/npcm7xx_smbus.c
+++ b/hw/i2c/npcm7xx_smbus.c
@@ -637,9 +637,6 @@ static void npcm7xx_smbus_write_rxf_sts(NPCM7xxSMBusState 
*s, uint8_t value)
 {
 if (value & NPCM7XX_SMBRXF_STS_RX_THST) {
 s->rxf_sts &= ~NPCM7XX_SMBRXF_STS_RX_THST;
-if (s->status == NPCM7XX_SMBUS_STATUS_RECEIVING) {
-npcm7xx_smbus_recv_fifo(s);
-}
 }
 }
 
@@ -651,6 +648,9 @@ static void npcm7xx_smbus_write_rxf_ctl(NPCM7xxSMBusState 
*s, uint8_t value)
 new_ctl = KEEP_OLD_BIT(s->rxf_ctl, new_ctl, NPCM7XX_SMBRXF_CTL_LAST);
 }
 s->rxf_ctl = new_ctl;
+if (s->status == NPCM7XX_SMBUS_STATUS_RECEIVING) {
+npcm7xx_smbus_recv_fifo(s);
+}
 }
 
 static uint64_t npcm7xx_smbus_read(void *opaque, hwaddr offset, unsigned size)
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 0/8] Misc NPCM7XX patches

2022-07-14 Thread Hao Wu

[NOTE: I'm reviving a bunch of patches that was in the process of
upstreaming a while ago but paused.]

This patch set contains a few bug fixes and I2C devices for some
NPCM7XX boards.

Patch 1~2 fix a problem that causes the SMBus module to behave
incorrectly when it's in FIFO mode and trying to receive more than
16 bytes at a time.

Patch 3 fixes a error in a register for ADC module.

Patch 4 makes the ADC input to be R/W instead of write only. It allows
a test system to read these via QMP and has no negative effect.

Patch 5 adds a new blockdev IF type IF_OTHER.

Patch 6 allows at24c_eeprom_init to take a bus as parameter so it can
be used by more use cases (e.g. behind an I2C mux.)

Patch 7 allows at24c_eeprom_init to take a drive as property, similar
to sdhci_attach_device().

Patch 8 uses the function defined in patch 5 to add the EEPROM and other
I2C devices for Quanta GBS board.

-- Changes since v4:
1. Add comments to patch 5.
2. Split patch 6 into 2 patches according to the feedback.  Each patch does it 
own task.

-- Changes since v3:
1. Add a new blockdev IF type IF_OTHER.
2. Use IF_OTHER instead of IF_NONE.

-- Changes since v2:
1. Dropped patch 7.
2. Drop an extra variable in patch 5.

-- Changes since v1:
1. Rewrote patch 5 to implement the function in NPCM7xx board file instead
   of the EEPROM device file.
2. Slightly modify patch 6 to adapt to the changes and QEMU comment style.
3. Squash patch 7 into patch 5 to make it compile.
4. Add a new patch 7.

Hao Wu (7):
  hw/i2c: Clear ACK bit in NPCM7xx SMBus module
  hw/i2c: Read FIFO during RXF_CTL change in NPCM7XX SMBus
  hw/adc: Fix CONV bit in NPCM7XX ADC CON register
  hw/adc: Make adci[*] R/W in NPCM7XX ADC
  blockdev: Add a new IF type IF_OTHER
  hw/arm: npcm8xx_boards: EEPROMs can take bus as parameter
  hw/arm: Set drive property for at24c eeprom

Patrick Venture (1):
  hw/arm: quanta-gbs-bmc add i2c devices

 blockdev.c |   4 +-
 hw/adc/npcm7xx_adc.c   |   4 +-
 hw/arm/npcm7xx_boards.c| 102 -
 hw/i2c/npcm7xx_smbus.c |   8 +--
 include/sysemu/blockdev.h  |   1 +
 tests/qtest/npcm7xx_adc-test.c |   2 +-
 6 files changed, 73 insertions(+), 48 deletions(-)

-- 
2.37.0.170.g444d1eabd0-goog

[RFC] vhost: Move svq avail handler to virtio_net_handle_ctrl

2022-07-14 Thread Eugenio Pérez

DO NOT MERGE THIS RFC

... so we can evaluate if it is worth to move.

Answering [1].

It basically makes SVQ work in two different modes:
* data virtqueues works with the SVQ loop, intercepting guest's kicks
and device's call.
* CVQ does not move to that. Instead
  - It "forbid" to vhost-dev to register guest notifier
  - VirtIONet need to know about SVQ, making it possible to use the
external functions _add, _push...

The idea is good and it would avoid to add callbacks to SVQ, but
VirtIONet should use SVQ in a different way than routing through
vhost-vdpa. Also, we are playing (more) with the guest to host
notifiers, giving vhost devices a new status (started but guest
notifications go to queue handler directly).

SVQ would start working very differently depending if we are using from
the data vqs or the control one, so we need to make it easier to use
correctly. We could start doing that for the next development cycle so
we keep VirtIONet not knowing about SVQ for this one, and have more time
to test.

To provide SVQ to VirtIONet should be easy, probably adding a
VhostShadowVirtqueue cvq_svq member to VirtIONet.

Comments are welcome, thanks!

[1] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg02640.html
---
 include/hw/virtio/vhost.h |  2 ++
 hw/net/vhost_net.c|  6 +-
 hw/net/virtio-net.c   | 23 ---
 hw/virtio/vhost-vdpa.c|  8 +++-
 hw/virtio/vhost.c |  8 
 net/vhost-vdpa.c  | 22 +++---
 6 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index a346f23d13..634fd1bb25 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -85,6 +85,8 @@ struct vhost_dev {
 int vq_index_end;
 /* if non-zero, minimum required value for max_queues */
 int num_queues;
+/* Disable acquiring the host notifiers */
+bool not_enable_notifiers;
 uint64_t features;
 uint64_t acked_features;
 uint64_t backend_features;
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index ccac5b7a64..70cec99960 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -367,10 +367,14 @@ int vhost_net_start(VirtIODevice *dev, NetClientState 
*ncs,
 for (i = 0; i < nvhosts; i++) {
 if (i < data_queue_pairs) {
 peer = qemu_get_peer(ncs, i);
+net = get_vhost_net(peer);
+net->dev.not_enable_notifiers = false;
 } else {
 peer = qemu_get_peer(ncs, n->max_queue_pairs);
+net = get_vhost_net(peer);
+net->dev.not_enable_notifiers = true;
 }
-r = vhost_net_start_one(get_vhost_net(peer), dev);
+r = vhost_net_start_one(net, dev);
 
 if (r < 0) {
 goto err_start;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index dd0d056fde..52883b5f0e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -46,6 +46,7 @@
 #include "net_rx_pkt.h"
 #include "hw/virtio/vhost.h"
 #include "sysemu/qtest.h"
+#include "hw/virtio/vhost-vdpa.h"
 
 #define VIRTIO_NET_VM_VERSION11
 
@@ -1476,17 +1477,33 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
 return sizeof(status);
 }
 
+typedef struct VhostVDPAState {
+NetClientState nc;
+struct vhost_vdpa vhost_vdpa;
+VHostNetState *vhost_net;
+
+/* Control commands shadow buffers */
+void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
+bool started;
+} VhostVDPAState;
+extern VhostVDPAState *cvq_s;
+int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
+SVQElement *svq_elem, void 
*opaque);
 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 {
-VirtQueueElement *elem;
+SVQElement *svq_elem;
 
 for (;;) {
 size_t written;
-elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
-if (!elem) {
+svq_elem = virtqueue_pop(vq, sizeof(SVQElement));
+if (!svq_elem) {
 break;
 }
 
+VhostShadowVirtqueue *svq = 
g_ptr_array_index(cvq_s->vhost_vdpa.shadow_vqs, 0);
+vhost_vdpa_net_handle_ctrl_avail(svq, svq_elem, cvq_s);
+
+VirtQueueElement *elem = _elem->elem;
 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
  elem->out_sg, elem->out_num);
 if (written > 0) {
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 795ed5a049..33aace6e7c 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1222,10 +1222,16 @@ static int vhost_vdpa_set_vring_kick(struct vhost_dev 
*dev,
 {
 struct vhost_vdpa *v = dev->opaque;
 int vdpa_idx = file->index - dev->vq_index;
+struct vhost_vring_file file2 = *file;
+
+if (dev->not_enable_notifiers) {
+/* We don't want to route this ever */
+file2.fd = -1;
+}
 
 if (v->shadow_vqs_enabled) {
 VhostShadowVirtqueue

[PATCH] RISC-V: Allow both Zmmul and M

2022-07-14 Thread Palmer Dabbelt

We got to talking about how Zmmul and M interact with each other
https://github.com/riscv/riscv-isa-manual/issues/869 , and it turns out
that QEMU's behavior is slightly wrong: having Zmmul and M is a legal
combination, it just means that the multiplication instructions are
supported even when M is disabled at runtime via misa.

This just stops overriding M from Zmmul, with that the other checks for
the multiplication instructions work as per the ISA.

Signed-off-by: Palmer Dabbelt 
---
 target/riscv/cpu.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index db2b8e4d30..cab74faaca 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -709,11 +709,6 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 cpu->cfg.ext_ifencei = true;
 }
 
-if (cpu->cfg.ext_m && cpu->cfg.ext_zmmul) {
-warn_report("Zmmul will override M");
-cpu->cfg.ext_m = false;
-}
-
 if (cpu->cfg.ext_i && cpu->cfg.ext_e) {
 error_setg(errp,
"I and E extensions are incompatible");
-- 
2.34.1

[PATCH 8/8] hw/arm: quanta-gbs-bmc add i2c devices

2022-07-14 Thread Hao Wu

From: Patrick Venture 

Adds supported i2c devices to the quanta-gbc-bmc board.

Signed-off-by: Patrick Venture 
Reviewed-by: Hao Wu 
---
 hw/arm/npcm7xx_boards.c | 82 -
 1 file changed, 49 insertions(+), 33 deletions(-)

diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
index b8337871ba..4bae5589f0 100644
--- a/hw/arm/npcm7xx_boards.c
+++ b/hw/arm/npcm7xx_boards.c
@@ -290,10 +290,12 @@ static void quanta_gsj_fan_init(NPCM7xxMachine *machine, 
NPCM7xxState *soc)
 
 static void quanta_gbs_i2c_init(NPCM7xxState *soc)
 {
+I2CSlave *i2c_mux;
+
+/* i2c-0: */
+i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 0), TYPE_PCA9546, 0x71);
+
 /*
- * i2c-0:
- * pca9546@71
- *
  * i2c-1:
  * pca9535@24
  * pca9535@20
@@ -302,46 +304,60 @@ static void quanta_gbs_i2c_init(NPCM7xxState *soc)
  * pca9535@23
  * pca9535@25
  * pca9535@26
- *
- * i2c-2:
- * sbtsi@4c
- *
- * i2c-5:
- * atmel,24c64@50 mb_fru
- * pca9546@71
- * - channel 0: max31725@54
- * - channel 1: max31725@55
- * - channel 2: max31725@5d
- *  atmel,24c64@51 fan_fru
- * - channel 3: atmel,24c64@52 hsbp_fru
- *
+ */
+
+/* i2c-2: sbtsi@4c */
+
+/* i2c-5: */
+/* mb_fru */
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 5), 5, 0x50, 8192, 0);
+i2c_mux = i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 5),
+  TYPE_PCA9546, 0x71);
+/* max31725 is tmp105 compatible. */
+i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 0), "tmp105", 0x54);
+i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1), "tmp105", 0x55);
+i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 2), "tmp105", 0x5d);
+/* fan_fru */
+at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 2), 5, 0x51, 8192, 1);
+/* hsbp_fru */
+at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 3), 5, 0x52, 8192, 2);
+
+/*
  * i2c-6:
  * pca9545@73
  *
  * i2c-7:
  * pca9545@72
- *
- * i2c-8:
- * adi,adm1272@10
- *
- * i2c-9:
- * pca9546@71
- * - channel 0: isil,isl68137@60
- * - channel 1: isil,isl68137@61
- * - channel 2: isil,isl68137@63
- * - channel 3: isil,isl68137@45
- *
+ */
+
+/* i2c-8: */
+i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 8), "adm1272", 0x10);
+
+/* i2c-9: */
+i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 9), TYPE_PCA9546, 0x71);
+/*
+ * - channel 0: isil,isl68137@60
+ * - channel 1: isil,isl68137@61
+ * - channel 2: isil,isl68137@63
+ * - channel 3: isil,isl68137@45
+ */
+
+/*
  * i2c-10:
  * pca9545@71
  *
  * i2c-11:
  * pca9545@76
- *
- * i2c-12:
- * maxim,max34451@4e
- * isil,isl68137@5d
- * isil,isl68137@5e
- *
+ */
+
+/* i2c-12: */
+i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 12), "max34451", 0x4e);
+/*
+ * isil,isl68137@5d
+ * isil,isl68137@5e
+ */
+
+/*
  * i2c-14:
  * pca9545@70
  */
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 3/8] hw/adc: Fix CONV bit in NPCM7XX ADC CON register

2022-07-14 Thread Hao Wu

The correct bit for the CONV bit in NPCM7XX ADC is bit 13. This patch
fixes that in the module, and also lower the IRQ when the guest
is done handling an interrupt event from the ADC module.

Signed-off-by: Hao Wu 
Reviewed-by: Patrick Venture
Reviewed-by: Peter Maydell 
---
 hw/adc/npcm7xx_adc.c   | 2 +-
 tests/qtest/npcm7xx_adc-test.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/adc/npcm7xx_adc.c b/hw/adc/npcm7xx_adc.c
index 0f0a9f63e2..47fb9e5f74 100644
--- a/hw/adc/npcm7xx_adc.c
+++ b/hw/adc/npcm7xx_adc.c
@@ -36,7 +36,7 @@ REG32(NPCM7XX_ADC_DATA, 0x4)
 #define NPCM7XX_ADC_CON_INT BIT(18)
 #define NPCM7XX_ADC_CON_EN  BIT(17)
 #define NPCM7XX_ADC_CON_RST BIT(16)
-#define NPCM7XX_ADC_CON_CONVBIT(14)
+#define NPCM7XX_ADC_CON_CONVBIT(13)
 #define NPCM7XX_ADC_CON_DIV(rv) extract32(rv, 1, 8)
 
 #define NPCM7XX_ADC_MAX_RESULT  1023
diff --git a/tests/qtest/npcm7xx_adc-test.c b/tests/qtest/npcm7xx_adc-test.c
index 3fa6d9ece0..8048044d28 100644
--- a/tests/qtest/npcm7xx_adc-test.c
+++ b/tests/qtest/npcm7xx_adc-test.c
@@ -50,7 +50,7 @@
 #define CON_INT BIT(18)
 #define CON_EN  BIT(17)
 #define CON_RST BIT(16)
-#define CON_CONVBIT(14)
+#define CON_CONVBIT(13)
 #define CON_DIV(rv) extract32(rv, 1, 8)
 
 #define FST_RDSTBIT(1)
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 7/8] hw/arm: Set drive property for at24c eeprom

2022-07-14 Thread Hao Wu

This patch allows the user to attach an external drive as a property
for an onboard at24c eeprom device. It uses an unit number to
distinguish different devices.

Signed-off-by: Hao Wu 
---
 hw/arm/npcm7xx_boards.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
index b083b0c572..b8337871ba 100644
--- a/hw/arm/npcm7xx_boards.c
+++ b/hw/arm/npcm7xx_boards.c
@@ -141,11 +141,16 @@ static I2CBus *npcm7xx_i2c_get_bus(NPCM7xxState *soc, 
uint32_t num)
 }
 
 static void at24c_eeprom_init(I2CBus *i2c_bus, int bus, uint8_t addr,
-  uint32_t rsize)
+  uint32_t rsize, int unit)
 {
 I2CSlave *i2c_dev = i2c_slave_new("at24c-eeprom", addr);
 DeviceState *dev = DEVICE(i2c_dev);
+DriveInfo *dinfo;
 
+dinfo = drive_get(IF_OTHER, bus, unit);
+if (dinfo) {
+qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo));
+}
 qdev_prop_set_uint32(dev, "rom-size", rsize);
 i2c_slave_realize_and_unref(i2c_dev, i2c_bus, _abort);
 }
@@ -252,8 +257,8 @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc)
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 3), "tmp105", 0x5c);
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 4), "tmp105", 0x5c);
 
-at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 9), 9, 0x55, 8192);
-at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 10), 10, 0x55, 8192);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 9), 9, 0x55, 8192, 0);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 10), 10, 0x55, 8192, 1);
 
 /*
  * i2c-11:
@@ -360,7 +365,7 @@ static void kudo_bmc_i2c_init(NPCM7xxState *soc)
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 4), TYPE_PCA9548, 0x77);
 
 /* mbfru */
-at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 4), 4, 0x50, 8192);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 4), 4, 0x50, 8192, 0);
 
 i2c_mux = i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 13),
   TYPE_PCA9548, 0x77);
@@ -372,7 +377,7 @@ static void kudo_bmc_i2c_init(NPCM7xxState *soc)
 i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), "tmp105", 0x49);
 
 /* bmcfru */
-at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 14), 14, 0x55, 8192);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 14), 14, 0x55, 8192, 1);
 
 /* TODO: Add remaining i2c devices. */
 }
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 1/8] hw/i2c: Clear ACK bit in NPCM7xx SMBus module

2022-07-14 Thread Hao Wu

The ACK bit in NPCM7XX SMBus module should be cleared each time it
sends out a NACK signal. This patch fixes the bug that it fails to
do so.

Signed-off-by: Hao Wu 
Reviewed-by: Titus Rwantare 
Reviewed-by: Peter Maydell 
---
 hw/i2c/npcm7xx_smbus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i2c/npcm7xx_smbus.c b/hw/i2c/npcm7xx_smbus.c
index e7e0ba66fe..f18e311556 100644
--- a/hw/i2c/npcm7xx_smbus.c
+++ b/hw/i2c/npcm7xx_smbus.c
@@ -270,7 +270,7 @@ static void npcm7xx_smbus_recv_byte(NPCM7xxSMBusState *s)
 if (s->st & NPCM7XX_SMBCTL1_ACK) {
 trace_npcm7xx_smbus_nack(DEVICE(s)->canonical_path);
 i2c_nack(s->bus);
-s->st &= NPCM7XX_SMBCTL1_ACK;
+s->st &= ~NPCM7XX_SMBCTL1_ACK;
 }
 trace_npcm7xx_smbus_recv_byte((DEVICE(s)->canonical_path), s->sda);
 npcm7xx_smbus_update_irq(s);
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 4/8] hw/adc: Make adci[*] R/W in NPCM7XX ADC

2022-07-14 Thread Hao Wu

Our sensor test requires both reading and writing from a sensor's
QOM property. So we need to make the input of ADC module R/W instead
of write only for that to work.

Signed-off-by: Hao Wu 
Reviewed-by: Titus Rwantare 
Reviewed-by: Peter Maydell 
---
 hw/adc/npcm7xx_adc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/adc/npcm7xx_adc.c b/hw/adc/npcm7xx_adc.c
index 47fb9e5f74..bc6f3f55e6 100644
--- a/hw/adc/npcm7xx_adc.c
+++ b/hw/adc/npcm7xx_adc.c
@@ -242,7 +242,7 @@ static void npcm7xx_adc_init(Object *obj)
 
 for (i = 0; i < NPCM7XX_ADC_NUM_INPUTS; ++i) {
 object_property_add_uint32_ptr(obj, "adci[*]",
->adci[i], OBJ_PROP_FLAG_WRITE);
+>adci[i], OBJ_PROP_FLAG_READWRITE);
 }
 object_property_add_uint32_ptr(obj, "vref",
 >vref, OBJ_PROP_FLAG_WRITE);
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 6/8] hw/arm: npcm8xx_boards: EEPROMs can take bus as parameter

2022-07-14 Thread Hao Wu

We allow at24c_eeprom_init to take a I2CBus* as parameter. This allows
us to attach an EEPROM device behind an I2C mux which is not
possible with the old method.

Signed-off-by: Hao Wu 
---
 hw/arm/npcm7xx_boards.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
index 6bc6f5d2fe..b083b0c572 100644
--- a/hw/arm/npcm7xx_boards.c
+++ b/hw/arm/npcm7xx_boards.c
@@ -140,10 +140,9 @@ static I2CBus *npcm7xx_i2c_get_bus(NPCM7xxState *soc, 
uint32_t num)
 return I2C_BUS(qdev_get_child_bus(DEVICE(>smbus[num]), "i2c-bus"));
 }
 
-static void at24c_eeprom_init(NPCM7xxState *soc, int bus, uint8_t addr,
+static void at24c_eeprom_init(I2CBus *i2c_bus, int bus, uint8_t addr,
   uint32_t rsize)
 {
-I2CBus *i2c_bus = npcm7xx_i2c_get_bus(soc, bus);
 I2CSlave *i2c_dev = i2c_slave_new("at24c-eeprom", addr);
 DeviceState *dev = DEVICE(i2c_dev);
 
@@ -253,8 +252,8 @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc)
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 3), "tmp105", 0x5c);
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 4), "tmp105", 0x5c);
 
-at24c_eeprom_init(soc, 9, 0x55, 8192);
-at24c_eeprom_init(soc, 10, 0x55, 8192);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 9), 9, 0x55, 8192);
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 10), 10, 0x55, 8192);
 
 /*
  * i2c-11:
@@ -360,7 +359,8 @@ static void kudo_bmc_i2c_init(NPCM7xxState *soc)
 
 i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 4), TYPE_PCA9548, 0x77);
 
-at24c_eeprom_init(soc, 4, 0x50, 8192); /* mbfru */
+/* mbfru */
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 4), 4, 0x50, 8192);
 
 i2c_mux = i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 13),
   TYPE_PCA9548, 0x77);
@@ -371,7 +371,8 @@ static void kudo_bmc_i2c_init(NPCM7xxState *soc)
 i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 4), "tmp105", 0x48);
 i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), "tmp105", 0x49);
 
-at24c_eeprom_init(soc, 14, 0x55, 8192); /* bmcfru */
+/* bmcfru */
+at24c_eeprom_init(npcm7xx_i2c_get_bus(soc, 14), 14, 0x55, 8192);
 
 /* TODO: Add remaining i2c devices. */
 }
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 2/8] hw/i2c: Read FIFO during RXF_CTL change in NPCM7XX SMBus

2022-07-14 Thread Hao Wu

Originally we read in from SMBus when RXF_STS is cleared. However,
the driver clears RXF_STS before setting RXF_CTL, causing the SM bus
module to read incorrect amount of bytes in FIFO mode when the number
of bytes read changed. This patch fixes this issue.

Signed-off-by: Hao Wu 
Reviewed-by: Titus Rwantare 
Acked-by: Corey Minyard 
---
 hw/i2c/npcm7xx_smbus.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/i2c/npcm7xx_smbus.c b/hw/i2c/npcm7xx_smbus.c
index f18e311556..1435daea94 100644
--- a/hw/i2c/npcm7xx_smbus.c
+++ b/hw/i2c/npcm7xx_smbus.c
@@ -637,9 +637,6 @@ static void npcm7xx_smbus_write_rxf_sts(NPCM7xxSMBusState 
*s, uint8_t value)
 {
 if (value & NPCM7XX_SMBRXF_STS_RX_THST) {
 s->rxf_sts &= ~NPCM7XX_SMBRXF_STS_RX_THST;
-if (s->status == NPCM7XX_SMBUS_STATUS_RECEIVING) {
-npcm7xx_smbus_recv_fifo(s);
-}
 }
 }
 
@@ -651,6 +648,9 @@ static void npcm7xx_smbus_write_rxf_ctl(NPCM7xxSMBusState 
*s, uint8_t value)
 new_ctl = KEEP_OLD_BIT(s->rxf_ctl, new_ctl, NPCM7XX_SMBRXF_CTL_LAST);
 }
 s->rxf_ctl = new_ctl;
+if (s->status == NPCM7XX_SMBUS_STATUS_RECEIVING) {
+npcm7xx_smbus_recv_fifo(s);
+}
 }
 
 static uint64_t npcm7xx_smbus_read(void *opaque, hwaddr offset, unsigned size)
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 5/8] blockdev: Add a new IF type IF_OTHER

2022-07-14 Thread Hao Wu

This type is used to represent block devs that are not suitable to
be represented by other existing types.

A sample use is to represent an at24c eeprom device defined in
hw/nvram/eeprom_at24c.c. The block device can be used to contain the
content of the said eeprom device.

Signed-off-by: Hao Wu 
---
 blockdev.c| 4 +++-
 include/sysemu/blockdev.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 9230888e34..befd69ac5f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -82,6 +82,7 @@ static const char *const if_name[IF_COUNT] = {
 [IF_MTD] = "mtd",
 [IF_SD] = "sd",
 [IF_VIRTIO] = "virtio",
+[IF_OTHER] = "other",
 [IF_XEN] = "xen",
 };
 
@@ -726,7 +727,8 @@ QemuOptsList qemu_legacy_drive_opts = {
 },{
 .name = "if",
 .type = QEMU_OPT_STRING,
-.help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio)",
+.help = "interface (ide, scsi, sd, mtd, floppy, pflash, virtio,"
+" other)",
 },{
 .name = "file",
 .type = QEMU_OPT_STRING,
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index 3211b16513..d9dd5af291 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -21,6 +21,7 @@ typedef enum {
  */
 IF_NONE = 0,
 IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN,
+IF_OTHER,
 IF_COUNT
 } BlockInterfaceType;
 
-- 
2.37.0.170.g444d1eabd0-goog

[PATCH 0/8] Misc NPCM7XX patches

2022-07-14 Thread Hao Wu

[NOTE: I'm reviving a bunch of patches that was in the process of
upstreaming a while ago but paused.]

This patch set contains a few bug fixes and I2C devices for some
NPCM7XX boards.

Patch 1~2 fix a problem that causes the SMBus module to behave
incorrectly when it's in FIFO mode and trying to receive more than
16 bytes at a time.

Patch 3 fixes a error in a register for ADC module.

Patch 4 makes the ADC input to be R/W instead of write only. It allows
a test system to read these via QMP and has no negative effect.

Patch 5 adds a new blockdev IF type IF_OTHER.

Patch 6 allows at24c_eeprom_init to take a bus as parameter so it can
be used by more use cases (e.g. behind an I2C mux.)

Patch 7 allows at24c_eeprom_init to take a drive as property, similar
to sdhci_attach_device().

Patch 8 uses the function defined in patch 5 to add the EEPROM and other
I2C devices for Quanta GBS board.

-- Changes since v4:
1. Add comments to patch 5.
2. Split patch 6 into 2 patches according to the feedback.  Each patch does it 
own task.

-- Changes since v3:
1. Add a new blockdev IF type IF_OTHER.
2. Use IF_OTHER instead of IF_NONE.

-- Changes since v2:
1. Dropped patch 7.
2. Drop an extra variable in patch 5.

-- Changes since v1:
1. Rewrote patch 5 to implement the function in NPCM7xx board file instead
   of the EEPROM device file.
2. Slightly modify patch 6 to adapt to the changes and QEMU comment style.
3. Squash patch 7 into patch 5 to make it compile.
4. Add a new patch 7.

Hao Wu (7):
  hw/i2c: Clear ACK bit in NPCM7xx SMBus module
  hw/i2c: Read FIFO during RXF_CTL change in NPCM7XX SMBus
  hw/adc: Fix CONV bit in NPCM7XX ADC CON register
  hw/adc: Make adci[*] R/W in NPCM7XX ADC
  blockdev: Add a new IF type IF_OTHER
  hw/arm: npcm8xx_boards: EEPROMs can take bus as parameter
  hw/arm: Set drive property for at24c eeprom

Patrick Venture (1):
  hw/arm: quanta-gbs-bmc add i2c devices

 blockdev.c |   4 +-
 hw/adc/npcm7xx_adc.c   |   4 +-
 hw/arm/npcm7xx_boards.c| 102 -
 hw/i2c/npcm7xx_smbus.c |   8 +--
 include/sysemu/blockdev.h  |   1 +
 tests/qtest/npcm7xx_adc-test.c |   2 +-
 6 files changed, 73 insertions(+), 48 deletions(-)

-- 
2.37.0.170.g444d1eabd0-goog

Re: [PATCH v2 07/11] acpi/tests/bits: add python test that exercizes QEMU bios tables using biosbits

2022-07-14 Thread Ani Sinha

On Thu, Jul 14, 2022 at 19:49 Ani Sinha  wrote:

>
>
> On Thu, 14 Jul 2022, Daniel P. Berrangé wrote:
>
> > On Sun, Jul 10, 2022 at 10:30:10PM +0530, Ani Sinha wrote:
> > > This change adds python based test environment that can be used to run
> pytest
> > > from within a virtual environment. A bash script sets up a virtual
> environment
> > > and then runs the python based tests from within that environment.
> > > All dependent python packages are installed in the virtual environment
> using
> > > pip python module. QEMU python test modules are also available in the
> environment
> > > for spawning the QEMU based VMs.
> > >
> > > It also introduces QEMU acpi/smbios biosbits python test script which
> is run
> > > from within the python virtual environment. When the bios bits tests
> are run,
> > > bios bits binaries are downloaded from an external repo/location.
> > > Currently, the test points to an external private github repo where
> the bits
> > > archives are checked in.
> > >
> > > Signed-off-by: Ani Sinha 
> > > ---
> > >  tests/pytest/acpi-bits/acpi-bits-test-venv.sh |  59 +++
> > >  tests/pytest/acpi-bits/acpi-bits-test.py  | 382 ++
> > >  tests/pytest/acpi-bits/meson.build|  33 ++
> > >  tests/pytest/acpi-bits/requirements.txt   |   1 +
> > >  4 files changed, 475 insertions(+)
> > >  create mode 100644 tests/pytest/acpi-bits/acpi-bits-test-venv.sh
> > >  create mode 100644 tests/pytest/acpi-bits/acpi-bits-test.py
> > >  create mode 100644 tests/pytest/acpi-bits/meson.build
> > >  create mode 100644 tests/pytest/acpi-bits/requirements.txt
> > >
> > > diff --git a/tests/pytest/acpi-bits/acpi-bits-test-venv.sh
> b/tests/pytest/acpi-bits/acpi-bits-test-venv.sh
> > > new file mode 100644
> > > index 00..186395473b
> > > --- /dev/null
> > > +++ b/tests/pytest/acpi-bits/acpi-bits-test-venv.sh
> > > @@ -0,0 +1,59 @@
> > > +#!/usr/bin/env bash
> > > +# Generates a python virtual environment for the test to run.
> > > +# Then runs python test scripts from within that virtual environment.
> > > +#
> > > +# This program is free software; you can redistribute it and/or modify
> > > +# it under the terms of the GNU General Public License as published by
> > > +# the Free Software Foundation; either version 2 of the License, or
> > > +# (at your option) any later version.
> > > +#
> > > +# This program is distributed in the hope that it will be useful,
> > > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > +# GNU General Public License for more details.
> > > +#
> > > +# You should have received a copy of the GNU General Public License
> > > +# along with this program.  If not, see  >.
> > > +#
> > > +# Author: Ani Sinha 
> > > +
> > > +set -e
> > > +
> > > +MYPATH=$(realpath ${BASH_SOURCE:-$0})
> > > +MYDIR=$(dirname $MYPATH)
> > > +
> > > +if [ -z "$PYTEST_SOURCE_ROOT" ]; then
> > > +echo -n "Please set QTEST_SOURCE_ROOT env pointing"
> > > +echo " to the root of the qemu source tree."
> > > +echo -n "This is required so that the test can find the "
> > > +echo "python modules that it needs for execution."
> > > +exit 1
> > > +fi
> > > +SRCDIR=$PYTEST_SOURCE_ROOT
> > > +TESTSCRIPTS=("acpi-bits-test.py")
> > > +PIPCMD="-m pip -q --disable-pip-version-check"
> > > +# we need to save the old value of PWD before we do a change-dir later
> > > +PYTEST_PWD=$PWD
> > > +
> > > +TESTS_PYTHON=/usr/bin/python3
> > > +TESTS_VENV_REQ=requirements.txt
> > > +
> > > +# sadly for pip -e and -t options do not work together.
> > > +# please see https://github.com/pypa/pip/issues/562
> > > +cd $MYDIR
> > > +
> > > +$TESTS_PYTHON -m venv .
> > > +$TESTS_PYTHON $PIPCMD install -e $SRCDIR/python/
> > > +[ -f $TESTS_VENV_REQ ] && \
> > > +$TESTS_PYTHON $PIPCMD install -r $TESTS_VENV_REQ || exit 0
> > > +
> > > +# venv is activated at this point.
> > > +
> > > +# run the test
> > > +for testscript in ${TESTSCRIPTS[@]} ; do
> > > +export PYTEST_PWD; python3 $testscript
> > > +done
> > > +
> > > +cd $PYTEST_PWD
> > > +
> > > +exit 0
> > > diff --git a/tests/pytest/acpi-bits/acpi-bits-test.py
> b/tests/pytest/acpi-bits/acpi-bits-test.py
> > > new file mode 100644
> > > index 00..97e61eb709
> > > --- /dev/null
> > > +++ b/tests/pytest/acpi-bits/acpi-bits-test.py
> > > @@ -0,0 +1,382 @@
> > > +#!/usr/bin/env python3
> > > +# group: rw quick
> > > +# Exercize QEMU generated ACPI/SMBIOS tables using biosbits,
> > > +# https://biosbits.org/
> > > +#
> > > +# This program is free software; you can redistribute it and/or modify
> > > +# it under the terms of the GNU General Public License as published by
> > > +# the Free Software Foundation; either version 2 of the License, or
> > > +# (at your option) any later version.
> > > +#
> > > +# This program is distributed in the hope that it will be useful,
> > > +# but WITHOUT ANY WARRANTY;

Re: [RFC PATCH v9 20/23] vdpa: Buffer CVQ support on shadow virtqueue

2022-07-14 Thread Eugenio Perez Martin

On Thu, Jul 14, 2022 at 9:04 AM Jason Wang  wrote:
>
> On Thu, Jul 14, 2022 at 2:54 PM Eugenio Perez Martin
>  wrote:
> >
> > > > > +static void vhost_vdpa_net_handle_ctrl_used(VhostShadowVirtqueue 
> > > > > *svq,
> > > > > +void *vq_elem_opaque,
> > > > > +uint32_t dev_written)
> > > > > +{
> > > > > +g_autoptr(CVQElement) cvq_elem = vq_elem_opaque;
> > > > > +virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > > > > +const struct iovec out = {
> > > > > +.iov_base = cvq_elem->out_data,
> > > > > +.iov_len = cvq_elem->out_len,
> > > > > +};
> > > > > +const DMAMap status_map_needle = {
> > > > > +.translated_addr = (hwaddr)(uintptr_t)cvq_elem->in_buf,
> > > > > +.size = sizeof(status),
> > > > > +};
> > > > > +const DMAMap *in_map;
> > > > > +const struct iovec in = {
> > > > > +.iov_base = ,
> > > > > +.iov_len = sizeof(status),
> > > > > +};
> > > > > +g_autofree VirtQueueElement *guest_elem = NULL;
> > > > > +
> > > > > +if (unlikely(dev_written < sizeof(status))) {
> > > > > +error_report("Insufficient written data (%llu)",
> > > > > + (long long unsigned)dev_written);
> > > > > +goto out;
> > > > > +}
> > > > > +
> > > > > +in_map = vhost_iova_tree_find_iova(svq->iova_tree, 
> > > > > _map_needle);
> > > > > +if (unlikely(!in_map)) {
> > > > > +error_report("Cannot locate out mapping");
> > > > > +goto out;
> > > > > +}
> > > > > +
> > > > > +switch (cvq_elem->ctrl.class) {
> > > > > +case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> > > > > +break;
> > > > > +default:
> > > > > +error_report("Unexpected ctrl class %u", 
> > > > > cvq_elem->ctrl.class);
> > > > > +goto out;
> > > > > +};
> > > > > +
> > > > > +memcpy(, cvq_elem->in_buf, sizeof(status));
> > > > > +if (status != VIRTIO_NET_OK) {
> > > > > +goto out;
> > > > > +}
> > > > > +
> > > > > +status = VIRTIO_NET_ERR;
> > > > > +virtio_net_handle_ctrl_iov(svq->vdev, , 1, , 1);
> > > >
> > > >
> > > > I wonder if this is the best choice. It looks to me it might be better
> > > > to extend the virtio_net_handle_ctrl_iov() logic:
> > > >
> > > > virtio_net_handle_ctrl_iov() {
> > > >  if (svq enabled) {
> > > >   host_elem = iov_copy(guest_elem);
> > > >   vhost_svq_add(host_elem);
> > > >   vhost_svq_poll(host_elem);
> > > >  }
> > > >  // usersapce ctrl vq logic
> > > > }
> > > >
> > > >
> > > > This can help to avoid coupling too much logic in cvq (like the
> > > > avail,used and detach ops).
> > > >
> > >
> > > Let me try that way and I'll come back to you.
> > >
> >
> > The problem with that approach is that virtio_net_handle_ctrl_iov is
> > called from the SVQ used handler. How could we call it otherwise? I
> > find it pretty hard to do unless we return SVQ to the model where we
> > used VirtQueue.handle_output, discarded long ago.
>
> I'm not sure I get this. Can we simply let the cvq to be trapped as
> the current userspace datapath did?
>

Sending a very early draft RFC with that method, so we can compare if
it is worth the trouble

Thanks!

[PATCH v3] target/ppc: check tb_env != 0 before printing TBU/TBL/DECR

2022-07-14 Thread Matheus Ferst

When using "-machine none", env->tb_env is not allocated, causing the
segmentation fault reported in issue #85 (launchpad bug #811683). To
avoid this problem, check if the pointer != NULL before calling the
methods to print TBU/TBL/DECR.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/85
Signed-off-by: Matheus Ferst 
---
v3:
  - Only check env->tb_env in softmmu, linux-user get timebase from
elsewhere. Also, try to make the qemu_fprintf call more readable.
  - Link to v2: 
https://lists.gnu.org/archive/html/qemu-ppc/2022-07/msg00193.html
---
 target/ppc/cpu_init.c | 18 --
 target/ppc/monitor.c  |  9 +
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 86ad28466a..313c8bb300 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7476,17 +7476,15 @@ void ppc_cpu_dump_state(CPUState *cs, FILE *f, int 
flags)
  "%08x iidx %d didx %d\n",
  env->msr, env->spr[SPR_HID0], env->hflags,
  cpu_mmu_index(env, true), cpu_mmu_index(env, false));
-#if !defined(NO_TIMER_DUMP)
-qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
 #if !defined(CONFIG_USER_ONLY)
- " DECR " TARGET_FMT_lu
-#endif
- "\n",
- cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)
-#if !defined(CONFIG_USER_ONLY)
- , cpu_ppc_load_decr(env)
-#endif
-);
+if (env->tb_env) {
+qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
+ " DECR " TARGET_FMT_lu "\n", cpu_ppc_load_tbu(env),
+ cpu_ppc_load_tbl(env), cpu_ppc_load_decr(env));
+}
+#else
+qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64 "\n", cpu_ppc_load_tbu(env),
+ cpu_ppc_load_tbl(env));
 #endif
 for (i = 0; i < 32; i++) {
 if ((i & (RGPL - 1)) == 0) {
diff --git a/target/ppc/monitor.c b/target/ppc/monitor.c
index 0b805ef6e9..8250b1304e 100644
--- a/target/ppc/monitor.c
+++ b/target/ppc/monitor.c
@@ -55,6 +55,9 @@ static target_long monitor_get_decr(Monitor *mon, const 
struct MonitorDef *md,
 int val)
 {
 CPUArchState *env = mon_get_cpu_env(mon);
+if (!env->tb_env) {
+return 0;
+}
 return cpu_ppc_load_decr(env);
 }
 
@@ -62,6 +65,9 @@ static target_long monitor_get_tbu(Monitor *mon, const struct 
MonitorDef *md,
int val)
 {
 CPUArchState *env = mon_get_cpu_env(mon);
+if (!env->tb_env) {
+return 0;
+}
 return cpu_ppc_load_tbu(env);
 }
 
@@ -69,6 +75,9 @@ static target_long monitor_get_tbl(Monitor *mon, const struct 
MonitorDef *md,
int val)
 {
 CPUArchState *env = mon_get_cpu_env(mon);
+if (!env->tb_env) {
+return 0;
+}
 return cpu_ppc_load_tbl(env);
 }
 
-- 
2.25.1

target/hexagon: Fixed container/CI feedback on idef-parser v9 patchset

2022-07-14 Thread Anton Johansson via


Hi, Alex and Richard,

I implemented the changes you suggested in this conversation:

https://lists.nongnu.org/archive/html/qemu-devel/2022-05/threads.html#04679

The 9:th patch which updated libvirt-ci and added flex/bison to the
CI containers, has now been split into 5 separate patches that:

    1. Update libvirt-ci

    2. Fix renamed packages mappings in qemu.yml

    3. add flex/bison/glib2-static to qemu.yml

    4. regenerate CI containers

    5. manually add flex/bison to containers that build qemu-hexagon,
   but aren't covered by libvirt-ci

The v10 patchset contains these fixes in patches 8-12, and can be found 
here:


https://lists.nongnu.org/archive/html/qemu-devel/2022-06/threads.html#00122

If someone could take a look whenever you have the time, I would greatly 
appreciate it!


Thanks,

--
Anton Johansson,
rev.ng Labs Srl.

Re: [PULL 0/3] Guest Agent patches 2022-07-13

2022-07-14 Thread Peter Maydell

On Wed, 13 Jul 2022 at 11:19, Konstantin Kostiuk  wrote:
>
> The following changes since commit 08c8a31214e8ca29e05b9f6c3ee942b28ec58457:
>
>   Merge tag 'pull-tcg-20220712' of https://gitlab.com/rth7680/qemu into 
> staging (2022-07-12 11:52:11 +0530)
>
> are available in the Git repository at:
>
>   g...@github.com:kostyanf14/qemu.git tags/qga-win32-pull-2022-07-13
>
> for you to fetch changes up to 1db8a0b0ea2fb72ecab36bd3143a9715c083d5d3:
>
>   qga: add command 'guest-get-cpustats' (2022-07-13 12:19:18 +0300)
>
> 
> qga-win32-pull-2022-07-13
>
> 


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.1
for any user-visible changes.

-- PMM

[PATCH v2 04/19] vhost: Reorder vhost_svq_kick

2022-07-14 Thread Eugenio Pérez

Future code needs to call it from vhost_svq_add.

No functional change intended.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index e2184a4481..fd1839cec5 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -215,6 +215,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return true;
 }
 
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
+{
+/*
+ * We need to expose the available array entries before checking the used
+ * flags
+ */
+smp_mb();
+if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
+return;
+}
+
+event_notifier_set(>hdev_kick);
+}
+
 /**
  * Add an element to a SVQ.
  *
@@ -235,20 +249,6 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, 
VirtQueueElement *elem)
 return true;
 }
 
-static void vhost_svq_kick(VhostShadowVirtqueue *svq)
-{
-/*
- * We need to expose the available array entries before checking the used
- * flags
- */
-smp_mb();
-if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
-return;
-}
-
-event_notifier_set(>hdev_kick);
-}
-
 /**
  * Forward available buffers.
  *
-- 
2.31.1

[PATCH 10/11] target/arm: Make SPSR_hyp accessible for Cortex-R52

2022-07-14 Thread Tobias Roehmel

From: Tobias Röhmel 

The Cortex-R52 can access SPSR_hyp from hypervisor mode
as discussed here: https://github.com/zephyrproject-rtos/zephyr/issues/47330

Signed-off-by: Tobias Röhmel 
---
 target/arm/op_helper.c | 8 
 target/arm/translate.c | 5 +++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index c5bde1cfcc..aa019bc39d 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -522,6 +522,11 @@ static void msr_mrs_banked_exc_checks(CPUARMState *env, 
uint32_t tgtmode,
 return;
 }
 
+if (curmode == ARM_CPU_MODE_HYP && tgtmode == ARM_CPU_MODE_HYP
+&& arm_feature(env, ARM_FEATURE_V8_R)) {
+return;
+}
+
 if (curmode == tgtmode) {
 goto undef;
 }
@@ -570,6 +575,9 @@ void HELPER(msr_banked)(CPUARMState *env, uint32_t value, 
uint32_t tgtmode,
 switch (regno) {
 case 16: /* SPSRs */
 env->banked_spsr[bank_number(tgtmode)] = value;
+if (arm_feature(env, ARM_FEATURE_V8_R)) {
+env->spsr = value;
+}
 break;
 case 17: /* ELR_Hyp */
 env->elr_el[2] = value;
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 6617de775f..c097f7e417 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -2881,8 +2881,9 @@ static bool msr_banked_access_decode(DisasContext *s, int 
r, int sysm, int rn,
  * can be accessed also from Hyp mode, so forbid accesses from
  * EL0 or EL1.
  */
-if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
-(s->current_el < 3 && *regno != 17)) {
+if (!arm_dc_feature(s, ARM_FEATURE_V8_R)
+&& (!arm_dc_feature(s, ARM_FEATURE_EL2)
+|| s->current_el < 2 || (s->current_el < 3 && *regno != 17))) {
 goto undef;
 }
 break;
-- 
2.25.1

Re: [PATCH 10/11] target/arm: Make SPSR_hyp accessible for Cortex-R52

2022-07-14 Thread Peter Maydell

On Thu, 14 Jul 2022 at 16:46, Peter Maydell  wrote:
>
> On Thu, 14 Jul 2022 at 15:54, Tobias Roehmel  wrote:
> >
> > From: Tobias Röhmel 
> >
> > The Cortex-R52 can access SPSR_hyp from hypervisor mode
> > as discussed here: https://github.com/zephyrproject-rtos/zephyr/issues/47330
>
> The v8R Supplement pseudocode aarch32/functions/system/SPSRaccessValid
> says this is UNPREDICTABLE:
>
> when '0' // SPSR_hyp
> if !HaveEL(EL2) || mode != M32_Monitor then UNPREDICTABLE;
>
> so unless that's incorrect then I think QEMU is within its rights
> to UNDEF this (and real hardware may choose to UNDEF or not).
>
> I will enquire about whether there is a bug in the sample R52
> startup code.

The author of the startup code example agrees it's a bug that went
unnoticed because the FVP happens to permit this UNPREDICTABLE case,
so we don't need this QEMU patch.

-- PMM

[PATCH 05/11] target/arm: Make stage_2_format for cache attributes optional

2022-07-14 Thread Tobias Roehmel

From: Tobias Röhmel 

The Cortex-R52 has a 2 stage MPU translation process but doesn't have the 
FEAT_S2FWB feature. This makes it neccessary to allow for the old cache 
attribut combination.
This is facilitated by changing the control path of combine_cacheattrs instead 
of failing if the second cache attributes struct is not in that format.

Signed-off-by: Tobias Röhmel 
---
 target/arm/ptw.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 4d97a24808..8b037c1f55 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -2108,7 +2108,11 @@ static uint8_t combined_attrs_nofwb(CPUARMState *env,
 {
 uint8_t s1lo, s2lo, s1hi, s2hi, s2_mair_attrs, ret_attrs;
 
-s2_mair_attrs = convert_stage2_attrs(env, s2.attrs);
+if (s2.is_s2_format) {
+s2_mair_attrs = convert_stage2_attrs(env, s2.attrs);
+} else {
+s2_mair_attrs = s2.attrs;
+}
 
 s1lo = extract32(s1.attrs, 0, 4);
 s2lo = extract32(s2_mair_attrs, 0, 4);
@@ -2166,6 +2170,8 @@ static uint8_t force_cacheattr_nibble_wb(uint8_t attr)
 static uint8_t combined_attrs_fwb(CPUARMState *env,
   ARMCacheAttrs s1, ARMCacheAttrs s2)
 {
+assert(s2.is_s2_format && !s1.is_s2_format);
+
 switch (s2.attrs) {
 case 7:
 /* Use stage 1 attributes */
@@ -2215,7 +2221,6 @@ static ARMCacheAttrs combine_cacheattrs(CPUARMState *env,
 ARMCacheAttrs ret;
 bool tagged = false;
 
-assert(s2.is_s2_format && !s1.is_s2_format);
 ret.is_s2_format = false;
 
 if (s1.attrs == 0xf0) {
-- 
2.25.1

Re: [RFC PATCH 3/8] RFC: block: use transactions as a replacement of ->{can_}set_aio_context()

2022-07-14 Thread Hanna Reitz


On 12.07.22 23:19, Emanuele Giuseppe Esposito wrote:

-
RFC because I am not sure about the AioContext locks.
- Do we need to take the new AioContext lock? what does it protect?
- Taking the old AioContext lock is required now, because of
   bdrv_drained_begin calling AIO_WAIT_WHILE that unlocks the
   aiocontext. If we replace it with AIO_WAIT_WHILE_UNLOCKED,
   could we get rid of taking every time the old AioContext?
   drain would be enough to protect the graph modification.


It’s been a while, but as far as I remember (which may be wrong), the 
reason for how the locks are supposed to be taken was mostly that we 
need some defined state so that we know how to invoke 
bdrv_drained_begin() and bdrv_drained_end() (i.e. call the first one 
as-is, and switch the locks around for the latter one).


The idea of using _UNLOCKED sounds interesting, almost too obvious. I 
can’t see why that wouldn’t work, actually.



--

Simplify the way the aiocontext can be changed in a BDS graph.
There are currently two problems in bdrv_try_set_aio_context:
- There is a confusion of AioContext locks taken and released, because
   we assume that old aiocontext is always taken and new one is
   taken inside.


Yep, and that assumption is just broken in some cases, which is the main 
pain point I’m feeling with it right now.


For example, look at bdrv_attach_child_common(): Here, we attach a child 
to a parent, so we need to get them into a common AioContext. So first 
we try to put the child into the parent’s context, and if that fails, 
we’ll try the other way, putting the parent into the child’s context.


The problem is clear: The bdrv_try_set_aio_context() call requires us to 
hold the child’s current context but not the parent’s, and the 
child_class->set_aio_ctx() call requires the exact opposite.  But we 
never switch the context we have acquired, so this can’t both work.  
Better yet, nowhere is it defined what context a caller to 
bdrv_attach_child_common() will hold.


In practice, what happens here most of the time is that something will 
be moved from the main context to some other context, and since we’re in 
the main context already, that’ll just work.  But you can construct 
cases where something is attempted to be moved from an I/O thread into a 
different thread and then you’ll get a crash.


I’d be happy if we could do away with the requirement of having to hold 
any lock for changing a node’s AioContext.



- It doesn't look very safe to call bdrv_drained_begin while some
   nodes have already switched to the new aiocontext and others haven't.
   This could be especially dangerous because bdrv_drained_begin polls, so
   something else could be executed while graph is in an inconsistent
   state.

Additional minor nitpick: can_set and set_ callbacks both traverse the
graph, both using the ignored list of visited nodes in a different way.

Therefore, get rid of all of this and introduce a new callback,
change_aio_context, that uses transactions to efficiently, cleanly
and most importantly safely change the aiocontext of a graph.

This new callback is a "merge" of the two previous ones:
- Just like can_set_aio_context, recursively traverses the graph.
   Marks all nodes that are visited using a GList, and checks if
   they *could* change the aio_context.
- For each node that passes the above check, add a new transaction
   that implements a callback that effectively changes the aiocontext.
- If a node is a BDS, add two transactions: one taking care of draining
   the node at the beginning of the list (so that will be executed first)
   and one at the end taking care of changing the AioContext.
- Once done, the recursive function returns if *all* nodes can change
   the AioContext. If so, commit the above transactions. Otherwise don't
   do anything.
- The transaction list contains first all "drain" transactions, so
   we are sure we are draining all nodes in the same context, and then
   all the other switch the AioContext. In this way we make sure that
   bdrv_drained_begin() is always called under the old AioContext, and
   bdrv_drained_end() under the new one.
- Because of the above, we don't need to release and re-acquire the
   old AioContext every time, as everything is done once (and not
   per-node drain and aiocontext change).

Note that the "change" API is not yet invoked anywhere.


So the idea is that we introduce a completely new transaction-based API 
to change BDSs’ AioContext, and then drop the old one, right?



Signed-off-by: Emanuele Giuseppe Esposito 
---
  block.c| 197 +
  include/block/block-global-state.h |   9 ++
  include/block/block_int-common.h   |   3 +
  3 files changed, 209 insertions(+)

diff --git a/block.c b/block.c
index 267a39c0de..bda4e1bcef 100644
--- a/block.c
+++ b/block.c
@@ -7437,6 +7437,51 @@ static bool bdrv_parent_can_set_aio_context(BdrvChild 
*c, AioContext *ctx,
  return true;
  }

[PATCH 07/11] target/arm: Enable TTBCR_EAE for ARM_FEATURE_V8_R

2022-07-14 Thread Tobias Roehmel

From: Tobias Röhmel 

Enable TTBCR_EAE during reset since it's always set to 1 for Cortex-R52.
See ARM Architecture Reference Manual Supplement - ARMv8, for the ARMv8-R
AArch32 architecture profile Version:A.c section C1.2.

Signed-off-by: Tobias Röhmel 
---
 target/arm/cpu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 2c26a5387d..121fc2a819 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -261,6 +261,9 @@ static void arm_cpu_reset(DeviceState *dev)
 if (arm_feature(env, ARM_FEATURE_V8_R)) {
 env->cp15.rvbar = cpu->rvbar_prop;
 env->regs[15] = cpu->rvbar_prop;
+env->cp15.tcr_el[0].raw_tcr = TTBCR_EAE;
+env->cp15.tcr_el[1].raw_tcr = TTBCR_EAE;
+env->cp15.tcr_el[2].raw_tcr = TTBCR_EAE;
 }
 }
 
-- 
2.25.1

[PATCH 00/11] Add Cortex-R52

2022-07-14 Thread Tobias Roehmel

These patches add the ARM Cortex-R52. The biggest addition is
an implementation of the armv8-r MPU.
The last patch adds a machine that combines the new core with
an UART and a GIC. This machine can run many samples of
the Zephyr OS.

All information is taken from:
- ARM Cortex-R52 TRM revision r1p3
- ARM Architecture Reference Manual Supplement
-ARMv8 for the ARMv8-R AArch32 architecture profile Version A.c

Functionality that is not implemented:
- Changing between single and double precision floats
- Some hypervisor related functionality (HCR.T(R)VM,HADFSR,...)

Tobias Röhmel (11):
  target/arm: Add ARM_FEATURE_V8_R
  target/arm: Add ARM Cortex-R52 cpu
  target/arm: Add v8R MIDR register
  target/arm: Make RVBAR available for non AARCH64 CPUs
  target/arm: Make stage_2_format for cache attributes optional
  target/arm: Add ARMCacheAttrs to the signature of pmsav8_mpu_lookup
  target/arm: Enable TTBCR_EAE for ARM_FEATURE_V8_R
  target/arm Add PMSAv8r registers
  target/arm: Add PMSAv8r functionality
  target/arm: Make SPSR_hyp accessible for Cortex-R52
  hw/arm: Add R52 machine

 configs/devices/arm-softmmu/default.mak |   1 +
 hw/arm/Kconfig  |   5 +
 hw/arm/meson.build  |   1 +
 hw/arm/r52_machine.c| 133 +++
 hw/arm/r52_virt.c   | 217 
 include/hw/arm/r52_virt.h   |  61 +++
 target/arm/cpu.c|  10 +-
 target/arm/cpu.h|  11 ++
 target/arm/cpu_tcg.c|  54 ++
 target/arm/helper.c | 184 +++-
 target/arm/internals.h  |  13 +-
 target/arm/m_helper.c   |   3 +-
 target/arm/op_helper.c  |   8 +
 target/arm/ptw.c| 191 ++---
 target/arm/translate.c  |   5 +-
 15 files changed, 859 insertions(+), 38 deletions(-)
 create mode 100644 hw/arm/r52_machine.c
 create mode 100644 hw/arm/r52_virt.c
 create mode 100644 include/hw/arm/r52_virt.h

-- 
2.25.1

[PATCH 03/11] target/arm: Add v8R MIDR register

2022-07-14 Thread Tobias Roehmel

From: Tobias Röhmel 

This register is used by the ARM Cortex-R52.

Signed-off-by: Tobias Röhmel 
---
 target/arm/helper.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 6457e6301c..bdf1df37d5 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8201,6 +8201,15 @@ void register_cp_regs_for_features(ARMCPU *cpu)
   .accessfn = access_aa64_tid1,
   .type = ARM_CP_CONST, .resetvalue = cpu->revidr },
 };
+ARMCPRegInfo id_v8r_midr_cp_reginfo[] = {
+{ .name = "MIDR",
+  .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = CP_ANY,
+  .access = PL1_R, .resetvalue = cpu->midr,
+  .writefn = arm_cp_write_ignore, .raw_writefn = raw_write,
+  .readfn = midr_read,
+  .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
+  .type = ARM_CP_OVERRIDE },
+};
 ARMCPRegInfo id_cp_reginfo[] = {
 /* These are common to v8 and pre-v8 */
 { .name = "CTR",
@@ -8264,7 +8273,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 id_mpuir_reginfo.access = PL1_RW;
 id_tlbtr_reginfo.access = PL1_RW;
 }
-if (arm_feature(env, ARM_FEATURE_V8)) {
+if (arm_feature(env, ARM_FEATURE_V8_R)) {
+define_arm_cp_regs(cpu, id_v8r_midr_cp_reginfo);
+} else if (arm_feature(env, ARM_FEATURE_V8)) {
 define_arm_cp_regs(cpu, id_v8_midr_cp_reginfo);
 } else {
 define_arm_cp_regs(cpu, id_pre_v8_midr_cp_reginfo);
-- 
2.25.1

[PATCH 11/11] hw/arm: Add R52 machine

2022-07-14 Thread Tobias Roehmel

From: Tobias Röhmel 

Signed-off-by: Tobias Röhmel 
---
 configs/devices/arm-softmmu/default.mak |   1 +
 hw/arm/Kconfig  |   5 +
 hw/arm/meson.build  |   1 +
 hw/arm/r52_machine.c| 133 +++
 hw/arm/r52_virt.c   | 217 
 include/hw/arm/r52_virt.h   |  61 +++
 6 files changed, 418 insertions(+)
 create mode 100644 hw/arm/r52_machine.c
 create mode 100644 hw/arm/r52_virt.c
 create mode 100644 include/hw/arm/r52_virt.h

diff --git a/configs/devices/arm-softmmu/default.mak 
b/configs/devices/arm-softmmu/default.mak
index 6985a25377..4df0844080 100644
--- a/configs/devices/arm-softmmu/default.mak
+++ b/configs/devices/arm-softmmu/default.mak
@@ -42,3 +42,4 @@ CONFIG_FSL_IMX6UL=y
 CONFIG_SEMIHOSTING=y
 CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
 CONFIG_ALLWINNER_H3=y
+CONFIG_CORTEX_R52_VIRT=y
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 219262a8da..72ec0bb656 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -565,3 +565,8 @@ config ARMSSE
 select UNIMP
 select SSE_COUNTER
 select SSE_TIMER
+
+config CORTEX_R52_VIRT
+bool
+select ARM_GIC
+select PL011
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 2d8381339c..2a0cdb9c83 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -43,6 +43,7 @@ arm_ss.add(when: 'CONFIG_STM32F100_SOC', if_true: 
files('stm32f100_soc.c'))
 arm_ss.add(when: 'CONFIG_STM32F205_SOC', if_true: files('stm32f205_soc.c'))
 arm_ss.add(when: 'CONFIG_STM32F405_SOC', if_true: files('stm32f405_soc.c'))
 arm_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp.c', 
'xlnx-zcu102.c'))
+arm_ss.add(when: 'CONFIG_CORTEX_R52_VIRT', if_true: files('r52_virt.c', 
'r52_machine.c'))
 arm_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal.c', 
'xlnx-versal-virt.c'))
 arm_ss.add(when: 'CONFIG_FSL_IMX25', if_true: files('fsl-imx25.c', 
'imx25_pdk.c'))
 arm_ss.add(when: 'CONFIG_FSL_IMX31', if_true: files('fsl-imx31.c', 'kzm.c'))
diff --git a/hw/arm/r52_machine.c b/hw/arm/r52_machine.c
new file mode 100644
index 00..33e9764793
--- /dev/null
+++ b/hw/arm/r52_machine.c
@@ -0,0 +1,133 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/arm/r52_virt.h"
+#include "hw/boards.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qom/object.h"
+
+struct r52MachineState {
+MachineState parent_obj;
+
+ArmR52VirtState soc;
+
+bool secure;
+bool virt;
+
+struct arm_boot_info binfo;
+};
+
+#define TYPE_R52_MACHINE   MACHINE_TYPE_NAME("r52")
+OBJECT_DECLARE_SIMPLE_TYPE(r52MachineState, R52_MACHINE)
+
+
+static bool r52_get_secure(Object *obj, Error **errp)
+{
+r52MachineState *s = R52_MACHINE(obj);
+
+return s->secure;
+}
+
+static void r52_set_secure(Object *obj, bool value, Error **errp)
+{
+r52MachineState *s = R52_MACHINE(obj);
+
+s->secure = value;
+}
+
+static bool r52_get_virt(Object *obj, Error **errp)
+{
+r52MachineState *s = R52_MACHINE(obj);
+
+return s->virt;
+}
+
+static void r52_set_virt(Object *obj, bool value, Error **errp)
+{
+r52MachineState *s = R52_MACHINE(obj);
+
+s->virt = value;
+}
+
+static void r52_init(MachineState *machine)
+{
+r52MachineState *s = R52_MACHINE(machine);
+uint64_t ram_size = machine->ram_size;
+
+object_initialize_child(OBJECT(machine), "soc", >soc, TYPE_ARMR52VIRT);
+
+object_property_set_bool(OBJECT(>soc), "secure", s->secure,
+ _fatal);
+object_property_set_bool(OBJECT(>soc), "virtualization", s->virt,
+ _fatal);
+
+qdev_realize(DEVICE(>soc), NULL, _fatal);
+
+s->binfo.ram_size = ram_size;
+s->binfo.loader_start = 0;
+s->binfo.psci_conduit = QEMU_PSCI_CONDUIT_SMC;
+arm_load_kernel(s->soc.boot_cpu_ptr, machine, >binfo);
+}
+
+static void r52_machine_instance_init(Object *obj)
+{
+r52MachineState *s = R52_MACHINE(obj);
+
+/* Default to secure mode being disabled */
+s->secure = false;
+/* Default to virt (EL2) being enabled */
+s->virt = true;
+}
+
+static void r52_machine_class_init(ObjectClass *oc, void *data)
+{
+MachineClass *mc = MACHINE_CLASS(oc);
+
+mc->desc = "Cortex-R52 platform";
+mc->init = r52_init;
+mc->block_default_type = IF_IDE;
+mc->units_per_default_bus = 1;
+mc->ignore_memory_transaction_failures = true;
+mc->max_cpus =

[PATCH v2 18/19] vdpa: Add device migration blocker

2022-07-14 Thread Eugenio Pérez

Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if
it uses CVQ.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  1 +
 hw/virtio/vhost-vdpa.c | 14 ++
 2 files changed, 15 insertions(+)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index d85643..d10a89303e 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -35,6 +35,7 @@ typedef struct vhost_vdpa {
 bool shadow_vqs_enabled;
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
+Error *migration_blocker;
 GPtrArray *shadow_vqs;
 const VhostShadowVirtqueueOps *shadow_vq_ops;
 void *shadow_vq_ops_opaque;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index beaaa7049a..795ed5a049 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -20,6 +20,7 @@
 #include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/vhost-vdpa.h"
 #include "exec/address-spaces.h"
+#include "migration/blocker.h"
 #include "qemu/cutils.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
@@ -1022,6 +1023,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
 return true;
 }
 
+if (v->migration_blocker) {
+int r = migrate_add_blocker(v->migration_blocker, );
+if (unlikely(r < 0)) {
+goto err_migration_blocker;
+}
+}
+
 for (i = 0; i < v->shadow_vqs->len; ++i) {
 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
@@ -1064,6 +1072,9 @@ err:
 vhost_svq_stop(svq);
 }
 
+err_migration_blocker:
+error_reportf_err(err, "Cannot setup SVQ %u: ", i);
+
 return false;
 }
 
@@ -1083,6 +1094,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
 }
 }
 
+if (v->migration_blocker) {
+migrate_del_blocker(v->migration_blocker);
+}
 return true;
 }
 
-- 
2.31.1

[PATCH 02/11] target/arm: Add ARM Cortex-R52 cpu

2022-07-14 Thread Tobias Roehmel

From: Tobias Röhmel 

All constants are taken from the ARM Cortex-R52 Processor TRM Revision: r1p3

Signed-off-by: Tobias Röhmel 
---
 target/arm/cpu_tcg.c | 54 
 1 file changed, 54 insertions(+)

diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index b751a19c8a..49fb03c09a 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -843,6 +843,59 @@ static void cortex_r5_initfn(Object *obj)
 define_arm_cp_regs(cpu, cortexr5_cp_reginfo);
 }
 
+static const ARMCPRegInfo cortexr52_cp_reginfo[] = {
+/* Dummy the TCM region regs for the moment */
+{ .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0,
+  .access = PL1_RW, .type = ARM_CP_CONST },
+{ .name = "BTCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1,
+  .access = PL1_RW, .type = ARM_CP_CONST },
+{ .name = "DCACHE_INVAL", .cp = 15, .opc1 = 0, .crn = 15, .crm = 5,
+  .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP },
+};
+
+static void cortex_r52_initfn(Object *obj)
+{
+ARMCPU *cpu = ARM_CPU(obj);
+
+set_feature(>env, ARM_FEATURE_V8);
+set_feature(>env, ARM_FEATURE_V8_R);
+set_feature(>env, ARM_FEATURE_EL2);
+set_feature(>env, ARM_FEATURE_PMSA);
+set_feature(>env, ARM_FEATURE_NEON);
+set_feature(>env, ARM_FEATURE_GENERIC_TIMER);
+cpu->midr = 0x411fd133; /* r1p3 */
+cpu->revidr = 0x;
+cpu->reset_fpsid = 0x41034023;
+cpu->isar.mvfr0 = 0x10110222;
+cpu->isar.mvfr1 = 0x1211;
+cpu->isar.mvfr2 = 0x0043;
+cpu->ctr = 0x8144c004;
+cpu->reset_sctlr = 0x30c50838;
+cpu->isar.id_pfr0 = 0x0131;
+cpu->isar.id_pfr1 = 0x10111001;
+cpu->isar.id_dfr0 = 0x03010006;
+cpu->id_afr0 = 0x;
+cpu->isar.id_mmfr0 = 0x00211040;
+cpu->isar.id_mmfr1 = 0x4000;
+cpu->isar.id_mmfr2 = 0x0120;
+cpu->isar.id_mmfr3 = 0xf0102211;
+cpu->isar.id_mmfr4 = 0x0010;
+cpu->isar.id_isar0 = 0x02101110;
+cpu->isar.id_isar1 = 0x13112111;
+cpu->isar.id_isar2 = 0x21232142;
+cpu->isar.id_isar3 = 0x01112131;
+cpu->isar.id_isar4 = 0x00010142;
+cpu->isar.id_isar5 = 0x00010001;
+cpu->isar.dbgdidr = 0x77168000;
+cpu->clidr = (1 << 27) | (1 << 24) | 0x3;
+cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
+cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */
+
+cpu->pmsav7_dregion = 16;
+
+define_arm_cp_regs(cpu, cortexr52_cp_reginfo);
+}
+
 static void cortex_r5f_initfn(Object *obj)
 {
 ARMCPU *cpu = ARM_CPU(obj);
@@ -1148,6 +1201,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
 { .name = "cortex-m55",  .initfn = cortex_m55_initfn,
  .class_init = arm_v7m_class_init },
 { .name = "cortex-r5",   .initfn = cortex_r5_initfn },
+{ .name = "cortex-r52",   .initfn = cortex_r52_initfn },
 { .name = "cortex-r5f",  .initfn = cortex_r5f_initfn },
 { .name = "ti925t",  .initfn = ti925t_initfn },
 { .name = "sa1100",  .initfn = sa1100_initfn },
-- 
2.25.1

[PATCH v2 13/19] vhost: Add svq avail_handler callback

2022-07-14 Thread Eugenio Pérez

This allows external handlers to be aware of new buffers that the guest
places in the virtqueue.

When this callback is defined the ownership of the guest's virtqueue
element is transferred to the callback. This means that if the user
wants to forward the descriptor it needs to manually inject it. The
callback is also free to process the command by itself and use the
element with svq_push.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 31 +-
 hw/virtio/vhost-shadow-virtqueue.c | 14 --
 hw/virtio/vhost-vdpa.c |  3 ++-
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index b5c6e3b3b4..965ca88706 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -25,6 +25,27 @@ typedef struct SVQElement {
 unsigned int ndescs;
 } SVQElement;
 
+typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
+
+/**
+ * Callback to handle an avail buffer.
+ *
+ * @svq:  Shadow virtqueue
+ * @elem:  Element placed in the queue by the guest
+ * @vq_callback_opaque:  Opaque
+ *
+ * Returns 0 if the vq is running as expected.
+ *
+ * Note that ownership of elem is transferred to the callback.
+ */
+typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq,
+  SVQElement *elem,
+  void *vq_callback_opaque);
+
+typedef struct VhostShadowVirtqueueOps {
+VirtQueueAvailCallback avail_handler;
+} VhostShadowVirtqueueOps;
+
 /* Shadow virtqueue to relay notifications */
 typedef struct VhostShadowVirtqueue {
 /* Shadow vring */
@@ -69,6 +90,12 @@ typedef struct VhostShadowVirtqueue {
  */
 uint16_t *desc_next;
 
+/* Caller callbacks */
+const VhostShadowVirtqueueOps *ops;
+
+/* Caller callbacks opaque */
+void *ops_opaque;
+
 /* Next head to expose to the device */
 uint16_t shadow_avail_idx;
 
@@ -102,7 +129,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
VirtIODevice *vdev,
  VirtQueue *vq);
 void vhost_svq_stop(VhostShadowVirtqueue *svq);
 
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
+const VhostShadowVirtqueueOps *ops,
+void *ops_opaque);
 
 void vhost_svq_free(gpointer vq);
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 31a267f721..85b2d49326 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -306,7 +306,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 break;
 }
 
-r = vhost_svq_add_element(svq, elem);
+if (svq->ops) {
+r = svq->ops->avail_handler(svq, elem, svq->ops_opaque);
+} else {
+r = vhost_svq_add_element(svq, elem);
+}
 if (unlikely(r != 0)) {
 if (r == -ENOSPC) {
 /*
@@ -681,12 +685,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
  * shadow methods and file descriptors.
  *
  * @iova_tree: Tree to perform descriptors translations
+ * @ops: SVQ owner callbacks
+ * @ops_opaque: ops opaque pointer
  *
  * Returns the new virtqueue or NULL.
  *
  * In case of error, reason is reported through error_report.
  */
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
+const VhostShadowVirtqueueOps *ops,
+void *ops_opaque)
 {
 g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
 int r;
@@ -708,6 +716,8 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree 
*iova_tree)
 event_notifier_init_fd(>svq_kick, VHOST_FILE_UNBIND);
 event_notifier_set_handler(>hdev_call, vhost_svq_handle_call);
 svq->iova_tree = iova_tree;
+svq->ops = ops;
+svq->ops_opaque = ops_opaque;
 return g_steal_pointer();
 
 err_init_hdev_call:
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 66f054a12c..0b13e98471 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -418,8 +418,9 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 
 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
-g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
+g_autoptr(VhostShadowVirtqueue) svq;
 
+svq = vhost_svq_new(v->iova_tree, NULL, NULL);
 if (unlikely(!svq)) {
 error_setg(errp, "Cannot create svq %u", n);
 return -1;
-- 
2.31.1

[PATCH v2 19/19] vdpa: Add x-svq to NetdevVhostVDPAOptions

2022-07-14 Thread Eugenio Pérez

Finally offering the possibility to enable SVQ from the command line.

Signed-off-by: Eugenio Pérez 
Acked-by: Markus Armbruster 
---
 qapi/net.json|  9 +-
 net/vhost-vdpa.c | 72 ++--
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/qapi/net.json b/qapi/net.json
index 9af11e9a3b..75ba2cb989 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -445,12 +445,19 @@
 # @queues: number of queues to be created for multiqueue vhost-vdpa
 #  (default: 1)
 #
+# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
+# (default: false)
+#
+# Features:
+# @unstable: Member @x-svq is experimental.
+#
 # Since: 5.1
 ##
 { 'struct': 'NetdevVhostVDPAOptions',
   'data': {
 '*vhostdev': 'str',
-'*queues':   'int' } }
+'*queues':   'int',
+'*x-svq':{'type': 'bool', 'features' : [ 'unstable'] } } }
 
 ##
 # @NetdevVmnetHostOptions:
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 7ccf9eaf4d..85148a5114 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = {
 VHOST_INVALID_FEATURE_BIT
 };
 
+/** Supported device specific feature bits with SVQ */
+static const uint64_t vdpa_svq_device_features =
+BIT_ULL(VIRTIO_NET_F_CSUM) |
+BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
+BIT_ULL(VIRTIO_NET_F_MTU) |
+BIT_ULL(VIRTIO_NET_F_MAC) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
+BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
+BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
+BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
+BIT_ULL(VIRTIO_NET_F_STATUS) |
+BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
+BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
+BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
+BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
+BIT_ULL(VIRTIO_NET_F_STANDBY);
+
 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
@@ -133,9 +155,13 @@ err_init:
 static void vhost_vdpa_cleanup(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+struct vhost_dev *dev = >vhost_net->dev;
 
 qemu_vfree(s->cvq_cmd_out_buffer);
 qemu_vfree(s->cvq_cmd_in_buffer);
+if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+g_clear_pointer(>vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+}
 if (s->vhost_net) {
 vhost_net_cleanup(s->vhost_net);
 g_free(s->vhost_net);
@@ -437,7 +463,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
int vdpa_device_fd,
int queue_pair_index,
int nvqs,
-   bool is_datapath)
+   bool is_datapath,
+   bool svq,
+   VhostIOVATree *iova_tree)
 {
 NetClientState *nc = NULL;
 VhostVDPAState *s;
@@ -455,6 +483,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+s->vhost_vdpa.shadow_vqs_enabled = svq;
+s->vhost_vdpa.iova_tree = iova_tree;
 if (!is_datapath) {
 s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
 vhost_vdpa_net_cvq_cmd_page_len());
@@ -465,6 +495,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.shadow_vq_ops = _vdpa_net_svq_ops;
 s->vhost_vdpa.shadow_vq_ops_opaque = s;
+error_setg(>vhost_vdpa.migration_blocker,
+   "Migration disabled: vhost-vdpa uses CVQ.");
 }
 ret = vhost_vdpa_add(nc, (void *)>vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
@@ -474,6 +506,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 return nc;
 }
 
+static int vhost_vdpa_get_iova_range(int fd,
+ struct vhost_vdpa_iova_range *iova_range)
+{
+int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
+
+return ret < 0 ? -errno : 0;
+}
+
 static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
 {
 int ret = ioctl(fd, VHOST_GET_FEATURES, features);
@@ -524,6 +564,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
+g_autoptr(VhostIOVATree) iova_tree = NULL;
 NetClientState *nc;
 int queue_pairs, r, i, has_cvq = 0;
 
@@ -551,22 +592,45 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 return queue_pairs;
 }
 
+if (opts->x_svq) {
+

[PATCH 08/11] target/arm Add PMSAv8r registers

2022-07-14 Thread Tobias Roehmel

From: Tobias Röhmel 

Signed-off-by: Tobias Röhmel 
---
 target/arm/cpu.h|  10 +++
 target/arm/helper.c | 171 
 2 files changed, 181 insertions(+)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 86e06116a9..632d0d13c6 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -726,8 +726,18 @@ typedef struct CPUArchState {
  */
 uint32_t *rbar[M_REG_NUM_BANKS];
 uint32_t *rlar[M_REG_NUM_BANKS];
+uint32_t prbarn[255];
+uint32_t prlarn[255];
+uint32_t hprbarn[255];
+uint32_t hprlarn[255];
 uint32_t mair0[M_REG_NUM_BANKS];
 uint32_t mair1[M_REG_NUM_BANKS];
+uint32_t prbar;
+uint32_t prlar;
+uint32_t prselr;
+uint32_t hprbar;
+uint32_t hprlar;
+uint32_t hprselr;
 } pmsav8;
 
 /* v8M SAU */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index bdf1df37d5..adbf282d00 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -7422,6 +7422,78 @@ static CPAccessResult access_joscr_jmcr(CPUARMState *env,
 return CP_ACCESS_OK;
 }
 
+static void prbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+  uint64_t value)
+{
+env->pmsav8.prbarn[env->pmsav8.prselr] = value;
+}
+
+static void prlar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+  uint64_t value)
+{
+env->pmsav8.prlarn[env->pmsav8.prselr] = value;
+}
+
+static uint64_t prbar_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+return env->pmsav8.prbarn[env->pmsav8.prselr];
+}
+
+static uint64_t prlar_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+return env->pmsav8.prlarn[env->pmsav8.prselr];
+}
+
+static void hprbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+  uint64_t value)
+{
+env->pmsav8.hprbarn[env->pmsav8.hprselr] = value;
+}
+
+static void hprlar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+  uint64_t value)
+{
+env->pmsav8.hprlarn[env->pmsav8.hprselr] = value;
+}
+
+static void hprenr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+  uint64_t value)
+{
+uint32_t n;
+ARMCPU *cpu = env_archcpu(env);
+for (n = 0; n < (int)cpu->pmsav7_dregion; ++n) {
+if (value & (1 << n)) {
+env->pmsav8.hprlarn[n] |= 0x1;
+} else {
+env->pmsav8.hprlarn[n] &= (~0x1);
+}
+}
+}
+
+static uint64_t hprbar_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+return env->pmsav8.hprbarn[env->pmsav8.hprselr];
+}
+
+static uint64_t hprlar_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+return env->pmsav8.hprlarn[env->pmsav8.hprselr];
+}
+
+static uint64_t hprenr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+uint32_t n;
+uint32_t result = 0x0;
+ARMCPU *cpu = env_archcpu(env);
+
+for (n = 0; n < (int)cpu->pmsav7_dregion; ++n) {
+if (env->pmsav8.hprlarn[n] & 0x1) {
+result |= (0x1 << n);
+}
+}
+return result;
+}
+
 static const ARMCPRegInfo jazelle_regs[] = {
 { .name = "JIDR",
   .cp = 14, .crn = 0, .crm = 0, .opc1 = 7, .opc2 = 0,
@@ -8242,6 +8314,46 @@ void register_cp_regs_for_features(ARMCPU *cpu)
   .access = PL1_R, .type = ARM_CP_CONST,
   .resetvalue = cpu->pmsav7_dregion << 8
 };
+/* PMSAv8-R registers*/
+ARMCPRegInfo id_pmsav8_r_reginfo[] = {
+{ .name = "HMPUIR",
+  .cp = 15, .crn = 0, .crm = 0, .opc1 = 4, .opc2 = 4,
+  .access = PL2_R, .type = ARM_CP_CONST,
+  .resetvalue = cpu->pmsav7_dregion},
+ /* PMSAv8-R registers */
+{ .name = "PRBAR",
+  .cp = 15, .opc1 = 0, .crn = 6, .crm = 3, .opc2 = 0,
+  .access = PL1_RW, .resetvalue = 0,
+  .readfn = prbar_read, .writefn = prbar_write,
+  .fieldoffset = offsetof(CPUARMState, pmsav8.prbar)},
+{ .name = "PRLAR",
+  .cp = 15, .opc1 = 0, .crn = 6, .crm = 3, .opc2 = 1,
+  .access = PL1_RW, .resetvalue = 0,
+  .readfn = prlar_read, .writefn = prlar_write,
+  .fieldoffset = offsetof(CPUARMState, pmsav8.prlar)},
+{ .name = "PRSELR", .resetvalue = 0,
+  .cp = 15, .opc1 = 0, .crn = 6, .crm = 2, .opc2 = 1,
+  .access = PL1_RW, .accessfn = access_tvm_trvm,
+  .fieldoffset = offsetof(CPUARMState, pmsav8.prselr)},
+{ .name = "HPRBAR", .resetvalue = 0,
+  .readfn = hprbar_read, .writefn = hprbar_write,
+  .cp = 15, .opc1 = 4, .crn = 6, .crm = 3, .opc2 = 0,
+  .access = PL2_RW, .resetvalue = 0,
+  .fieldoffset = offsetof(CPUARMState, pmsav8.hprbar)},
+{ .name = "HPRLAR",
+  .cp = 15, .opc1 = 4, .crn = 6, .crm = 3, .opc2 = 1,
+  .access = PL2_RW, .resetvalue = 0,
+  .readfn =

[PATCH v2 15/19] vdpa: manual forward CVQ buffers

2022-07-14 Thread Eugenio Pérez

Do a simple forwarding of CVQ buffers, the same work SVQ could do but
through callbacks. No functional change intended.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  3 ++
 hw/virtio/vhost-vdpa.c |  3 +-
 net/vhost-vdpa.c   | 58 ++
 3 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 7214eb47dc..d85643 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -15,6 +15,7 @@
 #include 
 
 #include "hw/virtio/vhost-iova-tree.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/virtio.h"
 #include "standard-headers/linux/vhost_types.h"
 
@@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
 GPtrArray *shadow_vqs;
+const VhostShadowVirtqueueOps *shadow_vq_ops;
+void *shadow_vq_ops_opaque;
 struct vhost_dev *dev;
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 96997210be..beaaa7049a 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -419,7 +419,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
 g_autoptr(VhostShadowVirtqueue) svq;
 
-svq = vhost_svq_new(v->iova_tree, NULL, NULL);
+svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
+v->shadow_vq_ops_opaque);
 if (unlikely(!svq)) {
 error_setg(errp, "Cannot create svq %u", n);
 return -1;
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index df1e69ee72..805c9dd6b6 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -11,11 +11,14 @@
 
 #include "qemu/osdep.h"
 #include "clients.h"
+#include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "net/vhost-vdpa.h"
 #include "hw/virtio/vhost-vdpa.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/memalign.h"
 #include "qemu/option.h"
 #include "qapi/error.h"
 #include 
@@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+/**
+ * Forward buffer for the moment.
+ */
+static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
+SVQElement *svq_elem, void *opaque)
+{
+VirtQueueElement *elem = _elem->elem;
+unsigned int n = elem->out_num + elem->in_num;
+g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
+size_t in_len, dev_written;
+virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+int r;
+
+memcpy(dev_buffers, elem->out_sg, elem->out_num);
+memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
+
+r = vhost_svq_add(svq, _buffers[0], elem->out_num, _buffers[1],
+  elem->in_num, svq_elem);
+if (unlikely(r != 0)) {
+if (unlikely(r == -ENOSPC)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
+  __func__);
+}
+goto out;
+}
+
+/*
+ * We can poll here since we've had BQL from the time we sent the
+ * descriptor. Also, we need to take the answer before SVQ pulls by itself,
+ * when BQL is released
+ */
+dev_written = vhost_svq_poll(svq);
+if (unlikely(dev_written < sizeof(status))) {
+error_report("Insufficient written data (%zu)", dev_written);
+}
+
+out:
+in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, ,
+  sizeof(status));
+if (unlikely(in_len < sizeof(status))) {
+error_report("Bad device CVQ written length");
+}
+vhost_svq_push_elem(svq, svq_elem, MIN(in_len, sizeof(status)));
+g_free(svq_elem);
+return r;
+}
+
+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
+.avail_handler = vhost_vdpa_net_handle_ctrl_avail,
+};
+
 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
const char *device,
const char *name,
@@ -211,6 +265,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+if (!is_datapath) {
+s->vhost_vdpa.shadow_vq_ops = _vdpa_net_svq_ops;
+s->vhost_vdpa.shadow_vq_ops_opaque = s;
+}
 ret = vhost_vdpa_add(nc, (void *)>vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
 qemu_del_net_client(nc);
-- 
2.31.1

[PATCH v2 12/19] vhost: add vhost_svq_poll

2022-07-14 Thread Eugenio Pérez

It allows the Shadow Control VirtQueue to wait for the device to use the
available buffers.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  1 +
 hw/virtio/vhost-shadow-virtqueue.c | 22 ++
 2 files changed, 23 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 1692541cbb..b5c6e3b3b4 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, const 
SVQElement *elem,
 int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
   size_t out_num, const struct iovec *in_sg, size_t in_num,
   SVQElement *elem);
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
 
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 5244896358..31a267f721 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -486,6 +486,28 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 } while (!vhost_svq_enable_notification(svq));
 }
 
+/**
+ * Poll the SVQ for one device used buffer.
+ *
+ * This function race with main event loop SVQ polling, so extra
+ * synchronization is needed.
+ *
+ * Return the length written by the device.
+ */
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
+{
+do {
+uint32_t len;
+SVQElement *elem = vhost_svq_get_buf(svq, );
+if (elem) {
+return len;
+}
+
+/* Make sure we read new used_idx */
+smp_rmb();
+} while (true);
+}
+
 /**
  * Forward used buffers.
  *
-- 
2.31.1

[PATCH v2 16/19] vdpa: Buffer CVQ support on shadow virtqueue

2022-07-14 Thread Eugenio Pérez

Introduce the control virtqueue support for vDPA shadow virtqueue. This
is needed for advanced networking features like rx filtering.

Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
TOCTOU with the guest's or device's memory every time there is a device
model change.  Otherwise, the guest could change the memory content in
the time between qemu and the device read it.

To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
implemented.  If the virtio-net driver changes MAC the virtio-net device
model will be updated with the new one, and a rx filtering change event
will be raised.

More cvq commands could be added here straightforwardly but they have
not been tested.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 211 +--
 1 file changed, 204 insertions(+), 7 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 805c9dd6b6..bc115a1455 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
 NetClientState nc;
 struct vhost_vdpa vhost_vdpa;
 VHostNetState *vhost_net;
+
+/* Control commands shadow buffers */
+void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
 bool started;
 } VhostVDPAState;
 
@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 
+qemu_vfree(s->cvq_cmd_out_buffer);
+qemu_vfree(s->cvq_cmd_in_buffer);
 if (s->vhost_net) {
 vhost_net_cleanup(s->vhost_net);
 g_free(s->vhost_net);
@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
+{
+VhostIOVATree *tree = v->iova_tree;
+DMAMap needle = {
+/*
+ * No need to specify size or to look for more translations since
+ * this contiguous chunk was allocated by us.
+ */
+.translated_addr = (hwaddr)(uintptr_t)addr,
+};
+const DMAMap *map = vhost_iova_tree_find_iova(tree, );
+int r;
+
+if (unlikely(!map)) {
+error_report("Cannot locate expected map");
+return;
+}
+
+r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
+if (unlikely(r != 0)) {
+error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
+}
+
+vhost_iova_tree_remove(tree, map);
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
+{
+/*
+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
+ * In buffer is always 1 byte, so it should fit here
+ */
+return sizeof(struct virtio_net_ctrl_hdr) +
+   2 * sizeof(struct virtio_net_ctrl_mac) +
+   MAC_TABLE_ENTRIES * ETH_ALEN;
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
+{
+return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
+}
+
+/** Copy and map a guest buffer. */
+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
+   const struct iovec *out_data,
+   size_t out_num, size_t data_len, void *buf,
+   size_t *written, bool write)
+{
+DMAMap map = {};
+int r;
+
+if (unlikely(!data_len)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
+  __func__, write ? "in" : "out");
+return false;
+}
+
+*written = iov_to_buf(out_data, out_num, 0, buf, data_len);
+map.translated_addr = (hwaddr)(uintptr_t)buf;
+map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
+map.perm = write ? IOMMU_RW : IOMMU_RO,
+r = vhost_iova_tree_map_alloc(v->iova_tree, );
+if (unlikely(r != IOVA_OK)) {
+error_report("Cannot map injected element");
+return false;
+}
+
+r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
+   !write);
+if (unlikely(r < 0)) {
+goto dma_map_err;
+}
+
+return true;
+
+dma_map_err:
+vhost_iova_tree_remove(v->iova_tree, );
+return false;
+}
+
 /**
- * Forward buffer for the moment.
+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC
+ *
+ * @iov: [0] is the out buffer, [1] is the in one
+ */
+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
+VirtQueueElement *elem,
+struct iovec *iov)
+{
+size_t in_copied;
+bool ok;
+
+iov[0].iov_base = s->cvq_cmd_out_buffer;
+ok = vhost_vdpa_cvq_map_buf(>vhost_vdpa, elem->out_sg, elem->out_num,
+vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
+[0].iov_len, false);
+if (unlikely(!ok)) {
+return false;
+}
+
+iov[1].iov_base = s->cvq_cmd_in_buffer;
+ok = vhost_vdpa_cvq_map_buf(>vhost_vdpa, NULL, 0,
+

[PATCH v2 17/19] vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs

2022-07-14 Thread Eugenio Pérez

To know the device features is needed for CVQ SVQ, so SVQ knows if it
can handle all commands or not. Extract from
vhost_vdpa_get_max_queue_pairs so we can reuse it.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 net/vhost-vdpa.c | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index bc115a1455..7ccf9eaf4d 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -474,20 +474,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 return nc;
 }
 
-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
+static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
+{
+int ret = ioctl(fd, VHOST_GET_FEATURES, features);
+if (unlikely(ret < 0)) {
+error_setg_errno(errp, errno,
+ "Fail to query features from vhost-vDPA device");
+}
+return ret;
+}
+
+static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
+  int *has_cvq, Error **errp)
 {
 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
 g_autofree struct vhost_vdpa_config *config = NULL;
 __virtio16 *max_queue_pairs;
-uint64_t features;
 int ret;
 
-ret = ioctl(fd, VHOST_GET_FEATURES, );
-if (ret) {
-error_setg(errp, "Fail to query features from vhost-vDPA device");
-return ret;
-}
-
 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
 *has_cvq = 1;
 } else {
@@ -517,10 +521,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 NetClientState *peer, Error **errp)
 {
 const NetdevVhostVDPAOptions *opts;
+uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
 NetClientState *nc;
-int queue_pairs, i, has_cvq = 0;
+int queue_pairs, r, i, has_cvq = 0;
 
 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 opts = >u.vhost_vdpa;
@@ -534,7 +539,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 return -errno;
 }
 
-queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
+r = vhost_vdpa_get_features(vdpa_device_fd, , errp);
+if (unlikely(r < 0)) {
+return r;
+}
+
+queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
  _cvq, errp);
 if (queue_pairs < 0) {
 qemu_close(vdpa_device_fd);
-- 
2.31.1

[PATCH v2 09/19] vhost: Track number of descs in SVQElement

2022-07-14 Thread Eugenio Pérez

Since CVQ will be able to modify elements, the number of descriptors in
the guest may not match with the number of descriptors exposed. Track
separately.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  6 ++
 hw/virtio/vhost-shadow-virtqueue.c | 10 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index f35d4b8f90..143c86a568 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -17,6 +17,12 @@
 
 typedef struct SVQElement {
 VirtQueueElement elem;
+
+/*
+ * Number of descriptors exposed to the device. May or may not match
+ * guest's
+ */
+unsigned int ndescs;
 } SVQElement;
 
 /* Shadow virtqueue to relay notifications */
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 442ca3cbd3..3b112c4ec8 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -243,10 +243,10 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
   size_t in_num, SVQElement *svq_elem)
 {
 unsigned qemu_head;
-unsigned ndescs = in_num + out_num;
+svq_elem->ndescs = in_num + out_num;
 bool ok;
 
-if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
+if (unlikely(svq_elem->ndescs > vhost_svq_available_slots(svq))) {
 return -ENOSPC;
 }
 
@@ -393,7 +393,7 @@ static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue 
*svq,
 SVQElement *elem;
 const vring_used_t *used = svq->vring.used;
 vring_used_elem_t used_elem;
-uint16_t last_used, last_used_chain, num;
+uint16_t last_used, last_used_chain;
 
 if (!vhost_svq_more_used(svq)) {
 return NULL;
@@ -420,8 +420,8 @@ static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue 
*svq,
 }
 
 elem = svq->ring_id_maps[used_elem.id];
-num = elem->elem.in_num + elem->elem.out_num;
-last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
+last_used_chain = vhost_svq_last_desc_of_chain(svq, elem->ndescs,
+   used_elem.id);
 svq->desc_next[last_used_chain] = svq->free_head;
 svq->free_head = used_elem.id;
 
-- 
2.31.1

[PATCH v2 14/19] vdpa: Export vhost_vdpa_dma_map and unmap calls

2022-07-14 Thread Eugenio Pérez

Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from
the guest that could set a different state in qemu device model and vdpa
device.

To do so, it needs to be able to map these new buffers to the device.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 include/hw/virtio/vhost-vdpa.h | 4 
 hw/virtio/vhost-vdpa.c | 7 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index a29dbb3f53..7214eb47dc 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -39,4 +39,8 @@ typedef struct vhost_vdpa {
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
 
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly);
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
+
 #endif
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 0b13e98471..96997210be 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -71,8 +71,8 @@ static bool 
vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
 return false;
 }
 
-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
-  void *vaddr, bool readonly)
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr 
iova, hwaddr size,
 return ret;
 }
 
-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
-hwaddr size)
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
-- 
2.31.1

1 2 3 >

1 - 100 of 250 matches

Mail list logo