[PATCH] acpi/tests/avocado/bits: don't remove the work directory when V is in env

2022-11-16 Thread Ani Sinha
Debugging bits issue often involves running the QEMU command line manually
outside of the avocado environment with the generated ISO. Hence, its
inconvenient if the iso gets cleaned up after the test has finished. This change
makes sure that the work directory is kept after the test finishes if the test
is run with V=1 in the environment so that the iso is available for use with the
QEMU command line.

CC: Michael S. Tsirkin 
Signed-off-by: Ani Sinha 
---
 tests/avocado/acpi-bits.py | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py
index 8745a58a76..7657343f2a 100644
--- a/tests/avocado/acpi-bits.py
+++ b/tests/avocado/acpi-bits.py
@@ -354,7 +354,11 @@ def tearDown(self):
 if self._vm:
 self.assertFalse(not self._vm.is_running)
 self.logger.info('removing the work directory %s', self._workDir)
-shutil.rmtree(self._workDir)
+if not os.getenv('V'):
+shutil.rmtree(self._workDir)
+else:
+self.logger.info('not removing the work directory %s as V is ' \
+ 'passed in the environment', self._workDir)
 super().tearDown()
 
 def test_acpi_smbios_bits(self):
-- 
2.34.1




Re: [PATCH for 8.0 v7 10/10] vdpa: Always start CVQ in SVQ mode if possible

2022-11-16 Thread Eugenio Perez Martin
On Thu, Nov 17, 2022 at 7:52 AM Jason Wang  wrote:
>
>
> 在 2022/11/16 23:05, Eugenio Pérez 写道:
> > Isolate control virtqueue in its own group, allowing to intercept control
> > commands but letting dataplane run totally passthrough to the guest.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> > v7:
> > * Never ask for number of address spaces, just react if isolation is not
> >possible.
> > * Return ASID ioctl errors instead of masking them as if the device has
> >no asid.
> > * Simplify net_init_vhost_vdpa logic
> > * Add "if possible" suffix
> >
> > v6:
> > * Disable control SVQ if the device does not support it because of
> > features.
> >
> > v5:
> > * Fixing the not adding cvq buffers when x-svq=on is specified.
> > * Move vring state in vhost_vdpa_get_vring_group instead of using a
> >parameter.
> > * Rename VHOST_VDPA_NET_CVQ_PASSTHROUGH to VHOST_VDPA_NET_DATA_ASID
> >
> > v4:
> > * Squash vhost_vdpa_cvq_group_is_independent.
> > * Rebased on last CVQ start series, that allocated CVQ cmd bufs at load
> > * Do not check for cvq index on vhost_vdpa_net_prepare, we only have one
> >that callback registered in that NetClientInfo.
> >
> > v3:
> > * Make asid related queries print a warning instead of returning an
> >error and stop the start of qemu.
> > ---
> >   hw/virtio/vhost-vdpa.c |   3 +-
> >   net/vhost-vdpa.c   | 117 +++--
> >   2 files changed, 114 insertions(+), 6 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 852baf8b2c..a29a18a6a9 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -653,7 +653,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev 
> > *dev)
> >   {
> >   uint64_t features;
> >   uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
> > -0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
> > +0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
> > +0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
> >   int r;
> >
> >   if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, )) {
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index a9c864741a..dc13a49311 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -101,6 +101,8 @@ static const uint64_t vdpa_svq_device_features =
> >   BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> >   BIT_ULL(VIRTIO_NET_F_STANDBY);
> >
> > +#define VHOST_VDPA_NET_CVQ_ASID 1
> > +
> >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> >   {
> >   VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > @@ -242,6 +244,40 @@ static NetClientInfo net_vhost_vdpa_info = {
> >   .check_peer_type = vhost_vdpa_check_peer_type,
> >   };
> >
> > +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
> > +{
> > +struct vhost_vring_state state = {
> > +.index = vq_index,
> > +};
> > +int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, );
> > +
> > +if (unlikely(r < 0)) {
> > +error_report("Cannot get VQ %u group: %s", vq_index,
> > + g_strerror(errno));
> > +return r;
> > +}
> > +
> > +return state.num;
> > +}
> > +
> > +static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
> > +   unsigned vq_group,
> > +   unsigned asid_num)
> > +{
> > +struct vhost_vring_state asid = {
> > +.index = vq_group,
> > +.num = asid_num,
> > +};
> > +int r;
> > +
> > +r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, );
> > +if (unlikely(r < 0)) {
> > +error_report("Can't set vq group %u asid %u, errno=%d (%s)",
> > + asid.index, asid.num, errno, g_strerror(errno));
> > +}
> > +return r;
> > +}
> > +
> >   static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
> >   {
> >   VhostIOVATree *tree = v->iova_tree;
> > @@ -316,11 +352,69 @@ dma_map_err:
> >   static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> >   {
> >   VhostVDPAState *s;
> > -int r;
> > +struct vhost_vdpa *v;
> > +uint64_t backend_features;
> > +int64_t cvq_group;
> > +int cvq_index, r;
> >
> >   assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> >
> >   s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +v = >vhost_vdpa;
> > +
> > +v->shadow_data = s->always_svq;
> > +v->shadow_vqs_enabled = s->always_svq;
> > +s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
> > +
> > +if (s->always_svq) {
> > +goto out;
> > +}
> > +
> > +/* Backend features are not available in v->dev yet. */
> > +r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, _features);
> > +if (unlikely(r < 0)) {
> > +error_report("Cannot get vdpa backend_features: %s(%d)",
> > +g_strerror(errno), errno);
> > +return -1;
> > +}
> > +if (!(backend_features & 

Re: [PATCH] tests/avocado: configure acpi-bits to use avocado timeout

2022-11-16 Thread Ani Sinha
On Thu, Nov 17, 2022 at 5:24 AM Ani Sinha  wrote:
>
>
>
> On Wed, Nov 16, 2022 at 11:31 PM John Snow  wrote:
>>
>>
>>
>> On Tue, Nov 15, 2022, 10:24 PM Ani Sinha  wrote:
>>>
>>> On Wed, Nov 16, 2022 at 2:58 AM John Snow  wrote:
>>> >
>>> > Instead of using a hardcoded timeout, just rely on Avocado's built-in
>>> > test case timeout. This helps avoid timeout issues on machines where 60
>>> > seconds is not sufficient.
>>> >
>>> > Signed-off-by: John Snow 
>>>
>>> Reviewed-by: Ani Sinha 
>>
>>
>> Alex's critique is valid, though: the way vm.wait() works is to immediately  
>> terminate the serial console connection as it prepares for the VM to shut 
>> down. I forgot about this.
>>
>> (For historical reasons, it does this to avoid deadlocks when the pipe 
>> fills.)
>>
>> I think we definitely do want to make sure we watch the console *while* we 
>> wait for it to shut down, which is not a feature QEMUMachine really offers 
>> right now in a meaningful way.
>
>
> Maybe  we can push your current patch while we consider these console logging 
> enhancements for the next release window. Console logging woikd require some 
> changes in bits and some more testing. I'm not sure if I'll have time for it 
> immediately at present.
>
>>
>> I need to make some more drastic changes to machine.py, but in the meantime 
>> I can revise this patch to do something a bit smarter so we get console 
>> logging while we wait. This is a use case worth supporting.
>>
>> (Thanks for writing new and interesting tests!)

Spoke to John on IRC. Seems this patch using vm.wait() is safe for
this release as I do not use the console o/p in the test and do not
call vm.set_console().
When we enable the console output, some additional work will need to
be done for the QemuMachine library to make sure we avoid races when
we call vm.wait() with _early_cleanup().

>>
>>>
>>> > ---
>>> >  tests/avocado/acpi-bits.py | 10 ++
>>> >  1 file changed, 2 insertions(+), 8 deletions(-)
>>> >
>>> > diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py
>>> > index 8745a58a766..ac13e22dc93 100644
>>> > --- a/tests/avocado/acpi-bits.py
>>> > +++ b/tests/avocado/acpi-bits.py
>>> > @@ -385,12 +385,6 @@ def test_acpi_smbios_bits(self):
>>> >  self._vm.launch()
>>> >  # biosbits has been configured to run all the specified test 
>>> > suites
>>> >  # in batch mode and then automatically initiate a vm shutdown.
>>> > -# sleep for maximum of one minute
>>> > -max_sleep_time = time.monotonic() + 60
>>> > -while self._vm.is_running() and time.monotonic() < 
>>> > max_sleep_time:
>>> > -time.sleep(1)
>>> > -
>>> > -self.assertFalse(time.monotonic() > max_sleep_time,
>>> > - 'The VM seems to have failed to shutdown in 
>>> > time')
>>> > -
>>> > +# Rely on avocado's unit test timeout.
>>> > +self._vm.wait(timeout=None)
>>>
>>> I think this is fine. This just waits until the VM is shutdown on its
>>> own and relies on the avocado framework to timeout if it doesn't. We
>>> do not need to look into the console. The test issues a shutdown from
>>> the VM itself once its done with the batch operations.
>>
>>
>> Still, if it fails, we want to see the output, right? It's very frustrating 
>> if it doesn't, especially in an automated pipeline.
>>
>>>
>>> >  self.parse_log()
>>> > --
>>> > 2.37.3
>>> >
>>>



Re: [PATCH maybe-7.2 1/3] hw/i2c: only schedule pending master when bus is idle

2022-11-16 Thread Klaus Jensen
On Nov 17 07:56, Cédric Le Goater wrote:
> On 11/17/22 07:40, Klaus Jensen wrote:
> > On Nov 16 16:58, Cédric Le Goater wrote:
> > > On 11/16/22 09:43, Klaus Jensen wrote:
> > > > From: Klaus Jensen 
> > > > 
> > > > It is not given that the current master will release the bus after a
> > > > transfer ends. Only schedule a pending master if the bus is idle.
> > > > 
> > > > Fixes: 37fa5ca42623 ("hw/i2c: support multiple masters")
> > > > Signed-off-by: Klaus Jensen 
> > > > ---
> > > >hw/i2c/aspeed_i2c.c  |  2 ++
> > > >hw/i2c/core.c| 37 ++---
> > > >include/hw/i2c/i2c.h |  2 ++
> > > >3 files changed, 26 insertions(+), 15 deletions(-)
> > > > 
> > > > diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
> > > > index c166fd20fa11..1f071a3811f7 100644
> > > > --- a/hw/i2c/aspeed_i2c.c
> > > > +++ b/hw/i2c/aspeed_i2c.c
> > > > @@ -550,6 +550,8 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus 
> > > > *bus, uint64_t value)
> > > >}
> > > >SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_STOP_CMD, 0);
> > > >aspeed_i2c_set_state(bus, I2CD_IDLE);
> > > > +
> > > > +i2c_schedule_pending_master(bus->bus);
> > > 
> > > Shouldn't it be i2c_bus_release() ?
> > > 
> > 
> > The reason for having both i2c_bus_release() and
> > i2c_schedule_pending_master() is that i2c_bus_release() sort of pairs
> > with i2c_bus_master(). They either set or clear the bus->bh member.
> > 
> > In the current design, the controller (in this case the Aspeed I2C) is
> > an "implicit" master (it does not have a bottom half driving it), so
> > there is no bus->bh to clear.
> > 
> > I should (and will) write some documentation on the asynchronous API.
> 
> I found the routine names confusing. Thanks for the clarification.
> 
> Maybe we could do this rename  :
> 
>   i2c_bus_release() -> i2c_bus_release_and_clear()
>   i2c_schedule_pending_master() -> i2c_bus_release()
> 
> and keep i2c_schedule_pending_master() internal the I2C core subsystem.
> 

How about renaming i2c_bus_master to i2c_bus_acquire() such that it
pairs with i2c_bus_release().

And then add an i2c_bus_yield() to be used by the controller? I think we
should be able to assert in i2c_bus_yield() that bus->bh is NULL. But
I'll take a closer look at that.


signature.asc
Description: PGP signature


Re: [PATCH v3 2/2] nvme: Add physical writes/reads from OCP log

2022-11-16 Thread Klaus Jensen
On Nov 16 18:14, Joel Granados wrote:
> In order to evaluate write amplification factor (WAF) within the storage
> stack it is important to know the number of bytes written to the
> controller. The existing SMART log value of Data Units Written is too
> coarse (given in units of 500 Kb) and so we add the SMART health
> information extended from the OCP specification (given in units of bytes)
> 
> We add a controller argument (ocp) that toggles on/off the SMART log
> extended structure.  To accommodate different vendor specific specifications
> like OCP, we add a multiplexing function (nvme_vendor_specific_log) which
> will route to the different log functions based on arguments and log ids.
> We only return the OCP extended SMART log when the command is 0xC0 and ocp
> has been turned on in the args.
> 
> Though we add the whole nvme SMART log extended structure, we only populate
> the physical_media_units_{read,written}, log_page_version and
> log_page_uuid.
> 
> Signed-off-by: Joel Granados 
> ---
>  docs/system/devices/nvme.rst |  7 +
>  hw/nvme/ctrl.c   | 55 
>  hw/nvme/nvme.h   |  1 +
>  include/block/nvme.h | 36 +++
>  4 files changed, 99 insertions(+)
> 
> diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
> index 30f841ef62..1cc5e52c00 100644
> --- a/docs/system/devices/nvme.rst
> +++ b/docs/system/devices/nvme.rst
> @@ -53,6 +53,13 @@ parameters.
>Vendor ID. Set this to ``on`` to revert to the unallocated Intel ID
>previously used.
>  
> +``ocp`` (default: ``off``)
> +  The Open Compute Project defines the Datacenter NVMe SSD Specification that
> +  sits on top of NVMe. It describes additional commands and NVMe behaviors
> +  specific for the Datacenter. When this option is ``on`` OCP features such 
> as
> +  the SMART / Health information extended log become available in the
> +  controller.
> +
>  Additional Namespaces
>  -
>  
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index bf291f7ffe..c7215a4ed1 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -4455,6 +4455,41 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, 
> struct nvme_stats *stats)
>  stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
>  }
>  
> +static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae,
> + uint32_t buf_len, uint64_t off,
> + NvmeRequest *req)
> +{
> +NvmeNamespace *ns = NULL;
> +NvmeSmartLogExtended smart_l = { 0 };
> +struct nvme_stats stats = { 0 };
> +uint32_t trans_len;
> +
> +if (off >= sizeof(smart_l)) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +/* accumulate all stats from all namespaces */
> +for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
> +ns = nvme_ns(n, i);
> +if (ns) {
> +nvme_set_blk_stats(ns, );
> +}
> +}
> +
> +smart_l.physical_media_units_written[0] = 
> cpu_to_le32(stats.units_written);
> +smart_l.physical_media_units_read[0] = cpu_to_le32(stats.units_read);

These are uint64s, so should be cpu_to_le64().

> +smart_l.log_page_version = 0x0003;
> +smart_l.log_page_uuid[0] = 0xA4F2BFEA2810AFC5;
> +smart_l.log_page_uuid[1] = 0xAFD514C97C6F4F9C;

Technically the field is called the "Log Page GUID", not the UUID.
Perhaps this is a bit of Microsoft leaking in, or it is to differentiate
it from the UUID Index functionality, who knows.

It looks like you byte swapped the two 64 bit parts, but not the
individual bytes. It's super confusing when the spec just says "shall be
set to VALUE". Is that VALUE already in little endian? Sigh.

Anyway, I think it is fair to assume that, so just make
log_page_uuid/guid a uint8_t 16-array and do something like:

static const uint8_t uuid[16] = {
0xAF, 0xD5, 0x14, 0xC9, 0x7C, 0x6F, 0x4F, 0x9C,
0xA4, 0xF2, 0xBF, 0xEA, 0x28, 0x10, 0xAF, 0xC5,
};

memcpy(smart_l.log_page_guid, uuid, sizeof(smart_l.log_page_guid));


signature.asc
Description: PGP signature


[PATCH v3 4/9] target/riscv: add support for Zcd extension

2022-11-16 Thread Weiwei Li
Separate c_fld/c_fsd from fld/fsd to add additional check for
c.fld{sp}/c.fsd{sp} which is useful for zcmp/zcmt to reuse
their encodings

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Richard Henderson 
---
 target/riscv/insn16.decode  |  8 
 target/riscv/insn_trans/trans_rvd.c.inc | 18 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index f3ea650325..b62664b6af 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -97,12 +97,12 @@
 }
 {
   lq  001  ... ... .. ... 00 @cl_q
-  fld 001  ... ... .. ... 00 @cl_d
+  c_fld   001  ... ... .. ... 00 @cl_d
 }
 lw010  ... ... .. ... 00 @cl_w
 {
   sq  101  ... ... .. ... 00 @cs_q
-  fsd 101  ... ... .. ... 00 @cs_d
+  c_fsd   101  ... ... .. ... 00 @cs_d
 }
 sw110  ... ... .. ... 00 @cs_w
 
@@ -148,7 +148,7 @@ addw  100 1 11 ... 01 ... 01 @cs_2
 slli  000 .  .  . 10 @c_shift2
 {
   lq  001  ... ... .. ... 10 @c_lqsp
-  fld 001 .  .  . 10 @c_ldsp
+  c_fld   001 .  .  . 10 @c_ldsp
 }
 {
   illegal 010 -  0  - 10 # c.lwsp, RES rd=0
@@ -166,7 +166,7 @@ slli  000 .  .  . 10 @c_shift2
 }
 {
   sq  101  ... ... .. ... 10 @c_sqsp
-  fsd 101   ..  . 10 @c_sdsp
+  c_fsd   101   ..  . 10 @c_sdsp
 }
 sw110 .  .  . 10 @c_swsp
 
diff --git a/target/riscv/insn_trans/trans_rvd.c.inc 
b/target/riscv/insn_trans/trans_rvd.c.inc
index 1397c1ce1c..def0d7abfe 100644
--- a/target/riscv/insn_trans/trans_rvd.c.inc
+++ b/target/riscv/insn_trans/trans_rvd.c.inc
@@ -31,6 +31,12 @@
 } \
 } while (0)
 
+#define REQUIRE_ZCD(ctx) do { \
+if (!ctx->cfg_ptr->ext_zcd) {  \
+return false; \
+} \
+} while (0)
+
 static bool trans_fld(DisasContext *ctx, arg_fld *a)
 {
 TCGv addr;
@@ -57,6 +63,18 @@ static bool trans_fsd(DisasContext *ctx, arg_fsd *a)
 return true;
 }
 
+static bool trans_c_fld(DisasContext *ctx, arg_fld *a)
+{
+REQUIRE_ZCD(ctx);
+return trans_fld(ctx, a);
+}
+
+static bool trans_c_fsd(DisasContext *ctx, arg_fsd *a)
+{
+REQUIRE_ZCD(ctx);
+return trans_fsd(ctx, a);
+}
+
 static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a)
 {
 REQUIRE_FPU;
-- 
2.25.1




[PATCH v3 8/9] target/riscv: expose properties for Zc* extension

2022-11-16 Thread Weiwei Li
Expose zca,zcb,zcf,zcd,zcmp,zcmt properties

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 1ab04ab246..b9e41df96c 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -81,6 +81,12 @@ static const struct isa_ext_data isa_edata_arr[] = {
 ISA_EXT_DATA_ENTRY(zfhmin, true, PRIV_VERSION_1_12_0, ext_zfhmin),
 ISA_EXT_DATA_ENTRY(zfinx, true, PRIV_VERSION_1_12_0, ext_zfinx),
 ISA_EXT_DATA_ENTRY(zdinx, true, PRIV_VERSION_1_12_0, ext_zdinx),
+ISA_EXT_DATA_ENTRY(zca, true, PRIV_VERSION_1_12_0, ext_zca),
+ISA_EXT_DATA_ENTRY(zcb, true, PRIV_VERSION_1_12_0, ext_zcb),
+ISA_EXT_DATA_ENTRY(zcf, true, PRIV_VERSION_1_12_0, ext_zcf),
+ISA_EXT_DATA_ENTRY(zcd, true, PRIV_VERSION_1_12_0, ext_zcd),
+ISA_EXT_DATA_ENTRY(zcmp, true, PRIV_VERSION_1_12_0, ext_zcmp),
+ISA_EXT_DATA_ENTRY(zcmt, true, PRIV_VERSION_1_12_0, ext_zcmt),
 ISA_EXT_DATA_ENTRY(zba, true, PRIV_VERSION_1_12_0, ext_zba),
 ISA_EXT_DATA_ENTRY(zbb, true, PRIV_VERSION_1_12_0, ext_zbb),
 ISA_EXT_DATA_ENTRY(zbc, true, PRIV_VERSION_1_12_0, ext_zbc),
@@ -1114,6 +1120,13 @@ static Property riscv_cpu_extensions[] = {
 
 /* These are experimental so mark with 'x-' */
 DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
+
+DEFINE_PROP_BOOL("x-zca", RISCVCPU, cfg.ext_zca, false),
+DEFINE_PROP_BOOL("x-zcb", RISCVCPU, cfg.ext_zcb, false),
+DEFINE_PROP_BOOL("x-zcd", RISCVCPU, cfg.ext_zcd, false),
+DEFINE_PROP_BOOL("x-zcf", RISCVCPU, cfg.ext_zcf, false),
+DEFINE_PROP_BOOL("x-zcmp", RISCVCPU, cfg.ext_zcmp, false),
+DEFINE_PROP_BOOL("x-zcmt", RISCVCPU, cfg.ext_zcmt, false),
 /* ePMP 0.9.3 */
 DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
 DEFINE_PROP_BOOL("x-smaia", RISCVCPU, cfg.ext_smaia, false),
-- 
2.25.1




[PATCH v3 0/9] support subsets of code size reduction extension

2022-11-16 Thread Weiwei Li
This patchset implements RISC-V Zc* extension v1.0.0.RC5.7 version 
instructions. 

Specification:
https://github.com/riscv/riscv-code-size-reduction/tree/main/Zc-specification

The port is available here:
https://github.com/plctlab/plct-qemu/tree/plct-zce-upstream-v3

To test Zc* implementation, specify cpu argument with 
'x-zca=true,x-zcb=true,x-zcf=true,f=true" and "x-zcd=true,d=true" (or 
"x-zcmp=true,x-zcmt=true" with c or d=false) to enable Zca/Zcb/Zcf and Zcd(or 
Zcmp,Zcmt) extension support. 

This implementation can pass the basic zc tests from 
https://github.com/yulong-plct/zc-test

v3:
* update the solution for Zcf to the way of Zcd
* update Zcb to reuse gen_load/store
* use trans function instead of helper for push/pop

v2:
* add check for relationship between Zca/Zcf/Zcd with C/F/D based on related 
discussion in review of Zc* spec
* separate c.fld{sp}/fsd{sp} with fld{sp}/fsd{sp} before support of zcmp/zcmt

Weiwei Li (9):
  target/riscv: add cfg properties for Zc* extension
  target/riscv: add support for Zca extension
  target/riscv: add support for Zcf extension
  target/riscv: add support for Zcd extension
  target/riscv: add support for Zcb extension
  target/riscv: add support for Zcmp extension
  target/riscv: add support for Zcmt extension
  target/riscv: expose properties for Zc* extension
  disas/riscv.c: add disasm support for Zc*

 disas/riscv.c | 287 -
 target/riscv/cpu.c|  56 
 target/riscv/cpu.h|   8 +
 target/riscv/cpu_bits.h   |   7 +
 target/riscv/csr.c|  35 +++
 target/riscv/helper.h |   3 +
 target/riscv/insn16.decode|  63 +++-
 target/riscv/insn_trans/trans_rvd.c.inc   |  18 ++
 target/riscv/insn_trans/trans_rvf.c.inc   |  26 +-
 target/riscv/insn_trans/trans_rvi.c.inc   |   4 +-
 target/riscv/insn_trans/trans_rvzce.c.inc | 367 ++
 target/riscv/insn_trans/trans_rvzfh.c.inc |   6 +-
 target/riscv/machine.c|  19 ++
 target/riscv/meson.build  |   3 +-
 target/riscv/translate.c  |  15 +-
 target/riscv/zce_helper.c |  57 
 16 files changed, 953 insertions(+), 21 deletions(-)
 create mode 100644 target/riscv/insn_trans/trans_rvzce.c.inc
 create mode 100644 target/riscv/zce_helper.c

-- 
2.25.1




[PATCH v3 9/9] disas/riscv.c: add disasm support for Zc*

2022-11-16 Thread Weiwei Li
Zcmp/Zcmt instructions will override disasm for c.fld*/c.fsd*
instructions currently

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
 disas/riscv.c | 287 +-
 1 file changed, 286 insertions(+), 1 deletion(-)

diff --git a/disas/riscv.c b/disas/riscv.c
index d216b9c39b..81369063b5 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -163,6 +163,13 @@ typedef enum {
 rv_codec_v_i,
 rv_codec_vsetvli,
 rv_codec_vsetivli,
+rv_codec_zcb_ext,
+rv_codec_zcb_mul,
+rv_codec_zcb_lb,
+rv_codec_zcb_lh,
+rv_codec_zcmp_cm_pushpop,
+rv_codec_zcmp_cm_mv,
+rv_codec_zcmt_jt,
 } rv_codec;
 
 typedef enum {
@@ -935,6 +942,26 @@ typedef enum {
 rv_op_vsetvli = 766,
 rv_op_vsetivli = 767,
 rv_op_vsetvl = 768,
+rv_op_c_zext_b = 769,
+rv_op_c_sext_b = 770,
+rv_op_c_zext_h = 771,
+rv_op_c_sext_h = 772,
+rv_op_c_zext_w = 773,
+rv_op_c_not = 774,
+rv_op_c_mul = 775,
+rv_op_c_lbu = 776,
+rv_op_c_lhu = 777,
+rv_op_c_lh = 778,
+rv_op_c_sb = 779,
+rv_op_c_sh = 780,
+rv_op_cm_push = 781,
+rv_op_cm_pop = 782,
+rv_op_cm_popret = 783,
+rv_op_cm_popretz = 784,
+rv_op_cm_mva01s = 785,
+rv_op_cm_mvsa01 = 786,
+rv_op_cm_jt = 787,
+rv_op_cm_jalt = 788,
 } rv_op;
 
 /* structures */
@@ -958,6 +985,7 @@ typedef struct {
 uint8_t   rnum;
 uint8_t   vm;
 uint32_t  vzimm;
+uint8_t   rlist;
 } rv_decode;
 
 typedef struct {
@@ -1070,6 +1098,10 @@ static const char rv_vreg_name_sym[32][4] = {
 #define rv_fmt_vd_vm  "O\tDm"
 #define rv_fmt_vsetvli"O\t0,1,v"
 #define rv_fmt_vsetivli   "O\t0,u,v"
+#define rv_fmt_rs1_rs2_zce_ldst   "O\t2,i(1)"
+#define rv_fmt_push_rlist "O\tx,-i"
+#define rv_fmt_pop_rlist  "O\tx,i"
+#define rv_fmt_zcmt_index "O\ti"
 
 /* pseudo-instruction constraints */
 
@@ -2065,7 +2097,27 @@ const rv_opcode_data opcode_data[] = {
 { "vsext.vf8", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, rv_op_vsext_vf8, 
rv_op_vsext_vf8, 0 },
 { "vsetvli", rv_codec_vsetvli, rv_fmt_vsetvli, NULL, rv_op_vsetvli, 
rv_op_vsetvli, 0 },
 { "vsetivli", rv_codec_vsetivli, rv_fmt_vsetivli, NULL, rv_op_vsetivli, 
rv_op_vsetivli, 0 },
-{ "vsetvl", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, rv_op_vsetvl, 
rv_op_vsetvl, 0 }
+{ "vsetvl", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, rv_op_vsetvl, 
rv_op_vsetvl, 0 },
+{ "c.zext.b", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.sext.b", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.zext.h", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.sext.h", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.zext.w", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.not", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.mul", rv_codec_zcb_mul, rv_fmt_rd_rs2, NULL, 0, 0 },
+{ "c.lbu", rv_codec_zcb_lb, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
+{ "c.lhu", rv_codec_zcb_lh, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
+{ "c.lh", rv_codec_zcb_lh, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
+{ "c.sb", rv_codec_zcb_lb, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
+{ "c.sh", rv_codec_zcb_lh, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
+{ "cm.push", rv_codec_zcmp_cm_pushpop, rv_fmt_push_rlist, NULL, 0, 0 },
+{ "cm.pop", rv_codec_zcmp_cm_pushpop, rv_fmt_pop_rlist, NULL, 0, 0 },
+{ "cm.popret", rv_codec_zcmp_cm_pushpop, rv_fmt_pop_rlist, NULL, 0, 0, 0 },
+{ "cm.popretz", rv_codec_zcmp_cm_pushpop, rv_fmt_pop_rlist, NULL, 0, 0 },
+{ "cm.mva01s", rv_codec_zcmp_cm_mv, rv_fmt_rd_rs2, NULL, 0, 0, 0 },
+{ "cm.mvsa01", rv_codec_zcmp_cm_mv, rv_fmt_rd_rs2, NULL, 0, 0, 0 },
+{ "cm.jt", rv_codec_zcmt_jt, rv_fmt_zcmt_index, NULL, 0 },
+{ "cm.jalt", rv_codec_zcmt_jt, rv_fmt_zcmt_index, NULL, 0 },
 };
 
 /* CSR names */
@@ -2084,6 +2136,7 @@ static const char *csr_name(int csrno)
 case 0x000a: return "vxrm";
 case 0x000f: return "vcsr";
 case 0x0015: return "seed";
+case 0x0017: return "jvt";
 case 0x0040: return "uscratch";
 case 0x0041: return "uepc";
 case 0x0042: return "ucause";
@@ -2306,6 +2359,24 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa 
isa)
 op = rv_op_c_ld;
 }
 break;
+case 4:
+switch ((inst >> 10) & 0b111) {
+case 0: op = rv_op_c_lbu; break;
+case 1:
+if (((inst >> 6) & 1) == 0) {
+op = rv_op_c_lhu;
+} else {
+op = rv_op_c_lh;
+}
+break;
+case 2: op = rv_op_c_sb; break;
+case 3:
+if (((inst >> 6) & 1) == 0) {
+op = rv_op_c_sh;
+}
+break;
+}
+break;
 case 5:
 if (isa == rv128) {
 op = rv_op_c_sq;
@@ -2362,6 +2433,17 @@ static 

[PATCH v3 7/9] target/riscv: add support for Zcmt extension

2022-11-16 Thread Weiwei Li
Add encode, trans* functions and helper functions support for Zcmt
instrutions
Add support for jvt csr

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
 target/riscv/cpu.h|  2 +
 target/riscv/cpu_bits.h   |  7 +++
 target/riscv/csr.c| 35 ++
 target/riscv/helper.h |  3 ++
 target/riscv/insn16.decode|  7 ++-
 target/riscv/insn_trans/trans_rvf.c.inc   |  8 ++--
 target/riscv/insn_trans/trans_rvzce.c.inc | 29 +++-
 target/riscv/insn_trans/trans_rvzfh.c.inc |  6 +--
 target/riscv/machine.c| 19 
 target/riscv/meson.build  |  3 +-
 target/riscv/zce_helper.c | 57 +++
 11 files changed, 166 insertions(+), 10 deletions(-)
 create mode 100644 target/riscv/zce_helper.c

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 6e915b6937..0f9fffab2f 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -181,6 +181,8 @@ struct CPUArchState {
 
 uint32_t features;
 
+target_ulong jvt;
+
 #ifdef CONFIG_USER_ONLY
 uint32_t elf_flags;
 #endif
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 8b0d7e20ea..ce347e5575 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -319,6 +319,7 @@
 #define SMSTATEEN_MAX_COUNT 4
 #define SMSTATEEN0_CS   (1ULL << 0)
 #define SMSTATEEN0_FCSR (1ULL << 1)
+#define SMSTATEEN0_JVT  (1ULL << 2)
 #define SMSTATEEN0_HSCONTXT (1ULL << 57)
 #define SMSTATEEN0_IMSIC(1ULL << 58)
 #define SMSTATEEN0_AIA  (1ULL << 59)
@@ -523,6 +524,9 @@
 /* Crypto Extension */
 #define CSR_SEED0x015
 
+/* Zcmt Extension */
+#define CSR_JVT 0x017
+
 /* mstatus CSR bits */
 #define MSTATUS_UIE 0x0001
 #define MSTATUS_SIE 0x0002
@@ -894,4 +898,7 @@ typedef enum RISCVException {
 #define MHPMEVENT_IDX_MASK 0xF
 #define MHPMEVENT_SSCOF_RESVD  16
 
+/* JVT CSR bits */
+#define JVT_MODE   0x3F
+#define JVT_BASE   (~0x3F)
 #endif
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 8b25f885ec..901da42b53 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -167,6 +167,24 @@ static RISCVException ctr32(CPURISCVState *env, int csrno)
 return ctr(env, csrno);
 }
 
+static RISCVException zcmt(CPURISCVState *env, int csrno)
+{
+RISCVCPU *cpu = env_archcpu(env);
+
+if (!cpu->cfg.ext_zcmt) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+RISCVException ret = smstateen_acc_ok(env, 0, SMSTATEEN0_JVT);
+if (ret != RISCV_EXCP_NONE) {
+return ret;
+}
+#endif
+
+return RISCV_EXCP_NONE;
+}
+
 #if !defined(CONFIG_USER_ONLY)
 static RISCVException mctr(CPURISCVState *env, int csrno)
 {
@@ -3987,6 +4005,20 @@ RISCVException riscv_csrrw_debug(CPURISCVState *env, int 
csrno,
 return ret;
 }
 
+static RISCVException read_jvt(CPURISCVState *env, int csrno,
+   target_ulong *val)
+{
+*val = env->jvt;
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_jvt(CPURISCVState *env, int csrno,
+target_ulong val)
+{
+env->jvt = val;
+return RISCV_EXCP_NONE;
+}
+
 /* Control and Status Register function table */
 riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 /* User Floating-Point CSRs */
@@ -4024,6 +4056,9 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 /* Crypto Extension */
 [CSR_SEED] = { "seed", seed, NULL, NULL, rmw_seed },
 
+/* Zcmt Extension */
+[CSR_JVT] = {"jvt", zcmt, read_jvt, write_jvt},
+
 #if !defined(CONFIG_USER_ONLY)
 /* Machine Timers and Counters */
 [CSR_MCYCLE]= { "mcycle",any,   read_hpmcounter,
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 227c7122ef..2ae98f04d2 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1136,3 +1136,6 @@ DEF_HELPER_FLAGS_1(aes64im, TCG_CALL_NO_RWG_SE, tl, tl)
 
 DEF_HELPER_FLAGS_3(sm4ed, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl)
 DEF_HELPER_FLAGS_3(sm4ks, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl)
+
+/* Zce helper */
+DEF_HELPER_3(cm_jalt, tl, env, tl, tl)
diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index 4654c23052..c359c574ab 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -49,6 +49,7 @@
 %zcb_h_uimm  5:1 !function=ex_shift_1
 %zcmp_spimm  2:2 !function=ex_shift_4
 %zcmp_rlist  4:4
+%zcmt_index  2:8
 
 # Argument sets imported from insn32.decode:
   !extern
@@ -63,6 +64,7 @@
 _s  rs1 rs2  !extern
 
   zcmp_rlist zcmp_spimm
+  zcmt_index
 
 # Formats 16:
 @cr  . .  ..   rs2=%rs2_5   rs1=%rd %rd
@@ -106,6 +108,7 @@
 @zcb_sh   ... . .. ... .. ... ..imm=%zcb_h_uimm  rs1=%rs1_3 
rs2=%rs2_3
 @zcmp ... 

[PATCH v3 1/9] target/riscv: add cfg properties for Zc* extension

2022-11-16 Thread Weiwei Li
Add properties for Zca,Zcb,Zcf,Zcd,Zcmp,Zcmt extension
Add check for these properties

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Cc: Alistair Francis 
---
 target/riscv/cpu.c | 43 +++
 target/riscv/cpu.h |  6 ++
 2 files changed, 49 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 042fd541b4..1ab04ab246 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -805,6 +805,49 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 }
 }
 
+if (cpu->cfg.ext_c) {
+cpu->cfg.ext_zca = true;
+if (cpu->cfg.ext_f && env->misa_mxl_max == MXL_RV32) {
+cpu->cfg.ext_zcf = true;
+}
+if (cpu->cfg.ext_d) {
+cpu->cfg.ext_zcd = true;
+}
+}
+
+if (env->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) {
+error_setg(errp, "Zcf extension is only relevant to RV32");
+return;
+}
+
+if (!cpu->cfg.ext_f && cpu->cfg.ext_zcf) {
+error_setg(errp, "Zcf extension requires F extension");
+return;
+}
+
+if (!cpu->cfg.ext_d && cpu->cfg.ext_zcd) {
+error_setg(errp, "Zcd extensionrequires D extension");
+return;
+}
+
+if ((cpu->cfg.ext_zcf || cpu->cfg.ext_zcd || cpu->cfg.ext_zcb ||
+ cpu->cfg.ext_zcmp || cpu->cfg.ext_zcmt) && !cpu->cfg.ext_zca) {
+error_setg(errp, "Zcf/Zcd/Zcb/Zcmp/Zcmt extensions require Zca "
+ "extension");
+return;
+}
+
+if (cpu->cfg.ext_zcd && (cpu->cfg.ext_zcmp || cpu->cfg.ext_zcmt)) {
+error_setg(errp, "Zcmp/Zcmt extensions are incompatible with "
+ "Zcd extension");
+return;
+}
+
+if (cpu->cfg.ext_zcmt && !cpu->cfg.ext_icsr) {
+error_setg(errp, "Zcmt extension requires Zicsr extension");
+return;
+}
+
 if (cpu->cfg.ext_zk) {
 cpu->cfg.ext_zkn = true;
 cpu->cfg.ext_zkr = true;
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 9bd539d77a..6e915b6937 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -434,6 +434,12 @@ struct RISCVCPUConfig {
 bool ext_zbkc;
 bool ext_zbkx;
 bool ext_zbs;
+bool ext_zca;
+bool ext_zcb;
+bool ext_zcd;
+bool ext_zcf;
+bool ext_zcmp;
+bool ext_zcmt;
 bool ext_zk;
 bool ext_zkn;
 bool ext_zknd;
-- 
2.25.1




[PATCH v3 2/9] target/riscv: add support for Zca extension

2022-11-16 Thread Weiwei Li
Modify the check for C extension to Zca (C implies Zca)

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
 target/riscv/insn_trans/trans_rvi.c.inc | 4 ++--
 target/riscv/translate.c| 8 ++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
b/target/riscv/insn_trans/trans_rvi.c.inc
index 5c69b88d1e..0d73b919ce 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -56,7 +56,7 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
 tcg_gen_andi_tl(cpu_pc, cpu_pc, (target_ulong)-2);
 
 gen_set_pc(ctx, cpu_pc);
-if (!has_ext(ctx, RVC)) {
+if (!ctx->cfg_ptr->ext_zca) {
 TCGv t0 = tcg_temp_new();
 
 misaligned = gen_new_label();
@@ -178,7 +178,7 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond 
cond)
 
 gen_set_label(l); /* branch taken */
 
-if (!has_ext(ctx, RVC) && ((ctx->base.pc_next + a->imm) & 0x3)) {
+if (!ctx->cfg_ptr->ext_zca && ((ctx->base.pc_next + a->imm) & 0x3)) {
 /* misaligned */
 gen_exception_inst_addr_mis(ctx);
 } else {
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 2ab8772ebe..ee24b451e3 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -557,7 +557,7 @@ static void gen_jal(DisasContext *ctx, int rd, target_ulong 
imm)
 
 /* check misaligned: */
 next_pc = ctx->base.pc_next + imm;
-if (!has_ext(ctx, RVC)) {
+if (!ctx->cfg_ptr->ext_zca) {
 if ((next_pc & 0x3) != 0) {
 gen_exception_inst_addr_mis(ctx);
 return;
@@ -1097,7 +1097,11 @@ static void decode_opc(CPURISCVState *env, DisasContext 
*ctx, uint16_t opcode)
 ctx->virt_inst_excp = false;
 /* Check for compressed insn */
 if (insn_len(opcode) == 2) {
-if (!has_ext(ctx, RVC)) {
+/*
+ * Zca support all of the existing C extension, excluding all
+ * compressed floating point loads and stores
+ */
+if (!ctx->cfg_ptr->ext_zca) {
 gen_exception_illegal(ctx);
 } else {
 ctx->opcode = opcode;
-- 
2.25.1




[PATCH v3 6/9] target/riscv: add support for Zcmp extension

2022-11-16 Thread Weiwei Li
Add encode, trans* functions for Zcmp instructions

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
 target/riscv/insn16.decode|  18 ++
 target/riscv/insn_trans/trans_rvzce.c.inc | 242 +-
 target/riscv/translate.c  |   5 +
 3 files changed, 264 insertions(+), 1 deletion(-)

diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index 47603ec1e0..4654c23052 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -21,6 +21,8 @@
 %rs1_3 7:3!function=ex_rvc_register
 %rs2_3 2:3!function=ex_rvc_register
 %rs2_5 2:5
+%sreg1 7:3!function=ex_sreg_register
+%sreg2 2:3!function=ex_sreg_register
 
 # Immediates:
 %imm_ci12:s1 2:5
@@ -45,6 +47,8 @@
 
 %zcb_b_uimm  5:1 6:1
 %zcb_h_uimm  5:1 !function=ex_shift_1
+%zcmp_spimm  2:2 !function=ex_shift_4
+%zcmp_rlist  4:4
 
 # Argument sets imported from insn32.decode:
   !extern
@@ -56,7 +60,9 @@
  imm rd   !extern
  shamt rs1 rd !extern
 rd rs1   !extern
+_s  rs1 rs2  !extern
 
+  zcmp_rlist zcmp_spimm
 
 # Formats 16:
 @cr  . .  ..   rs2=%rs2_5   rs1=%rd %rd
@@ -98,6 +104,8 @@
 @zcb_lh   ... . .. ... .. ... ..imm=%zcb_h_uimm  rs1=%rs1_3 rd=%rs2_3
 @zcb_sb   ... . .. ... .. ... ..imm=%zcb_b_uimm  rs1=%rs1_3 
rs2=%rs2_3
 @zcb_sh   ... . .. ... .. ... ..imm=%zcb_h_uimm  rs1=%rs1_3 
rs2=%rs2_3
+@zcmp ... ...     ..%zcmp_rlist   %zcmp_spimm
+@cm_mv... ...  ... .. ... ..  _s  rs2=%sreg2rs1=%sreg1
 
 # *** RV32/64C Standard Extension (Quadrant 0) ***
 {
@@ -177,6 +185,16 @@ slli  000 .  .  . 10 @c_shift2
 {
   sq  101  ... ... .. ... 10 @c_sqsp
   c_fsd   101   ..  . 10 @c_sdsp
+
+  # *** RV64 and RV32 Zcmp Extension ***
+  [
+cm_push 101  11000   .. 10 @zcmp
+cm_pop  101  11010   .. 10 @zcmp
+cm_popret   101  0   .. 10 @zcmp
+cm_popretz  101  11100   .. 10 @zcmp
+cm_mva01s   101  011 ... 11 ... 10 @cm_mv
+cm_mvsa01   101  011 ... 01 ... 10 @cm_mv
+  ]
 }
 sw110 .  .  . 10 @c_swsp
 
diff --git a/target/riscv/insn_trans/trans_rvzce.c.inc 
b/target/riscv/insn_trans/trans_rvzce.c.inc
index de96c4afaf..f45224e388 100644
--- a/target/riscv/insn_trans/trans_rvzce.c.inc
+++ b/target/riscv/insn_trans/trans_rvzce.c.inc
@@ -1,5 +1,5 @@
 /*
- * RISC-V translation routines for the Zcb Standard Extension.
+ * RISC-V translation routines for the Zc[b,mp] Standard Extension.
  *
  * Copyright (c) 2021-2022 PLCT Lab
  *
@@ -21,6 +21,11 @@
 return false;   \
 } while (0)
 
+#define REQUIRE_ZCMP(ctx) do {   \
+if (!ctx->cfg_ptr->ext_zcmp) \
+return false;\
+} while (0)
+
 static bool trans_c_zext_b(DisasContext *ctx, arg_c_zext_b *a)
 {
 REQUIRE_ZCB(ctx);
@@ -98,3 +103,238 @@ static bool trans_c_sh(DisasContext *ctx, arg_c_sh *a)
 REQUIRE_ZCB(ctx);
 return gen_store(ctx, a, MO_UW);
 }
+
+static bool gen_zcmp_check(DisasContext *ctx, arg_zcmp *a)
+{
+/* rlist 0 to 3 are reserved for future EABI variant */
+if (a->zcmp_rlist < 4) {
+return false;
+}
+
+/* rlist <= 6 when RV32E/RV64E */
+if (ctx->cfg_ptr->ext_e && a->zcmp_rlist > 6) {
+return false;
+}
+
+return true;
+}
+
+#define X_S08
+#define X_S19
+#define X_Sn16
+
+static inline void update_push_pop_list(target_ulong rlist, bool *xreg_list)
+{
+switch (rlist) {
+case 15:
+xreg_list[X_Sn + 11] = true;
+xreg_list[X_Sn + 10] = true;
+/* FALL THROUGH */
+case 14:
+xreg_list[X_Sn + 9] = true;
+/* FALL THROUGH */
+case 13:
+xreg_list[X_Sn + 8] = true;
+/* FALL THROUGH */
+case 12:
+xreg_list[X_Sn + 7] = true;
+/* FALL THROUGH */
+case 11:
+xreg_list[X_Sn + 6] = true;
+/* FALL THROUGH */
+case 10:
+xreg_list[X_Sn + 5] = true;
+/* FALL THROUGH */
+case 9:
+xreg_list[X_Sn + 4] = true;
+/* FALL THROUGH */
+case 8:
+xreg_list[X_Sn + 3] = true;
+/* FALL THROUGH */
+case 7:
+xreg_list[X_Sn + 2] = true;
+/* FALL THROUGH */
+case 6:
+xreg_list[X_S1] = true;
+/* FALL THROUGH */
+case 5:
+xreg_list[X_S0] = true;
+/* FALL THROUGH */
+case 4:
+xreg_list[xRA] = true;
+break;
+}
+}
+
+static inline target_ulong caculate_stack_adj(int bytes, target_ulong rlist,
+  target_ulong spimm)
+{
+target_ulong stack_adj_base = 0;
+switch (rlist) {
+case 15:
+stack_adj_base = bytes == 4 ? 64 : 112;
+break;
+case 14:
+

[PATCH v3 3/9] target/riscv: add support for Zcf extension

2022-11-16 Thread Weiwei Li
Separate c_flw/c_fsw from flw/fsw to add check for Zcf extension

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
 target/riscv/insn16.decode  |  8 
 target/riscv/insn_trans/trans_rvf.c.inc | 18 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index ccfe59f294..f3ea650325 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -109,11 +109,11 @@ sw110  ... ... .. ... 00 @cs_w
 # *** RV32C and RV64C specific Standard Extension (Quadrant 0) ***
 {
   ld  011  ... ... .. ... 00 @cl_d
-  flw 011  ... ... .. ... 00 @cl_w
+  c_flw   011  ... ... .. ... 00 @cl_w
 }
 {
   sd  111  ... ... .. ... 00 @cs_d
-  fsw 111  ... ... .. ... 00 @cs_w
+  c_fsw   111  ... ... .. ... 00 @cs_w
 }
 
 # *** RV32/64C Standard Extension (Quadrant 1) ***
@@ -174,9 +174,9 @@ sw110 .  .  . 10 @c_swsp
 {
   c64_illegal 011 -  0  - 10 # c.ldsp, RES rd=0
   ld  011 .  .  . 10 @c_ldsp
-  flw 011 .  .  . 10 @c_lwsp
+  c_flw   011 .  .  . 10 @c_lwsp
 }
 {
   sd  111 .  .  . 10 @c_sdsp
-  fsw 111 .  .  . 10 @c_swsp
+  c_fsw   111 .  .  . 10 @c_swsp
 }
diff --git a/target/riscv/insn_trans/trans_rvf.c.inc 
b/target/riscv/insn_trans/trans_rvf.c.inc
index 93657680c6..426518957b 100644
--- a/target/riscv/insn_trans/trans_rvf.c.inc
+++ b/target/riscv/insn_trans/trans_rvf.c.inc
@@ -24,6 +24,12 @@
 return false; \
 } while (0)
 
+#define REQUIRE_ZCF(ctx) do {  \
+if (!ctx->cfg_ptr->ext_zcf) {  \
+return false;  \
+}  \
+} while (0)
+
 #ifndef CONFIG_USER_ONLY
 static inline bool smstateen_fcsr_check(DisasContext *ctx, int index)
 {
@@ -96,6 +102,18 @@ static bool trans_fsw(DisasContext *ctx, arg_fsw *a)
 return true;
 }
 
+static bool trans_c_flw(DisasContext *ctx, arg_flw *a)
+{
+REQUIRE_ZCF(ctx);
+return trans_flw(ctx, a);
+}
+
+static bool trans_c_fsw(DisasContext *ctx, arg_fsw *a)
+{
+REQUIRE_ZCF(ctx);
+return trans_fsw(ctx, a);
+}
+
 static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a)
 {
 REQUIRE_FPU;
-- 
2.25.1




[PATCH v3 5/9] target/riscv: add support for Zcb extension

2022-11-16 Thread Weiwei Li
Add encode and trans* functions support for Zcb instructions

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
 target/riscv/insn16.decode|  24 ++
 target/riscv/insn_trans/trans_rvzce.c.inc | 100 ++
 target/riscv/translate.c  |   2 +
 3 files changed, 126 insertions(+)
 create mode 100644 target/riscv/insn_trans/trans_rvzce.c.inc

diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index b62664b6af..47603ec1e0 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -43,6 +43,8 @@
 %imm_addi16sp  12:s1 3:2 5:1 2:1 6:1 !function=ex_shift_4
 %imm_lui   12:s1 2:5 !function=ex_shift_12
 
+%zcb_b_uimm  5:1 6:1
+%zcb_h_uimm  5:1 !function=ex_shift_1
 
 # Argument sets imported from insn32.decode:
   !extern
@@ -53,6 +55,7 @@
  imm rs2 rs1  !extern
  imm rd   !extern
  shamt rs1 rd !extern
+rd rs1   !extern
 
 
 # Formats 16:
@@ -89,6 +92,13 @@
 
 @c_andi ... . .. ... . ..  imm=%imm_ci rs1=%rs1_3 rd=%rs1_3
 
+@zcb_unary... ...  ... .. ... ..rs1=%rs1_3 rd=%rs1_3
+@zcb_binary   ... ...  ... .. ... ..rs2=%rs2_3   rs1=%rs1_3 rd=%rs1_3
+@zcb_lb   ... . .. ... .. ... ..imm=%zcb_b_uimm  rs1=%rs1_3 rd=%rs2_3
+@zcb_lh   ... . .. ... .. ... ..imm=%zcb_h_uimm  rs1=%rs1_3 rd=%rs2_3
+@zcb_sb   ... . .. ... .. ... ..imm=%zcb_b_uimm  rs1=%rs1_3 
rs2=%rs2_3
+@zcb_sh   ... . .. ... .. ... ..imm=%zcb_h_uimm  rs1=%rs1_3 
rs2=%rs2_3
+
 # *** RV32/64C Standard Extension (Quadrant 0) ***
 {
   # Opcode of all zeros is illegal; rd != 0, nzuimm == 0 is reserved.
@@ -180,3 +190,17 @@ sw110 .  .  . 10 @c_swsp
   sd  111 .  .  . 10 @c_sdsp
   c_fsw   111 .  .  . 10 @c_swsp
 }
+
+# *** RV64 and RV32 Zcb Extension ***
+c_zext_b  100 111  ... 11 000 01 @zcb_unary
+c_sext_b  100 111  ... 11 001 01 @zcb_unary
+c_zext_h  100 111  ... 11 010 01 @zcb_unary
+c_sext_h  100 111  ... 11 011 01 @zcb_unary
+c_zext_w  100 111  ... 11 100 01 @zcb_unary
+c_not 100 111  ... 11 101 01 @zcb_unary
+c_mul 100 111  ... 10 ... 01 @zcb_binary
+c_lbu 100 000  ... .. ... 00 @zcb_lb
+c_lhu 100 001  ... 0. ... 00 @zcb_lh
+c_lh  100 001  ... 1. ... 00 @zcb_lh
+c_sb  100 010  ... .. ... 00 @zcb_sb
+c_sh  100 011  ... 0. ... 00 @zcb_sh
diff --git a/target/riscv/insn_trans/trans_rvzce.c.inc 
b/target/riscv/insn_trans/trans_rvzce.c.inc
new file mode 100644
index 00..de96c4afaf
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvzce.c.inc
@@ -0,0 +1,100 @@
+/*
+ * RISC-V translation routines for the Zcb Standard Extension.
+ *
+ * Copyright (c) 2021-2022 PLCT Lab
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+#define REQUIRE_ZCB(ctx) do {   \
+if (!ctx->cfg_ptr->ext_zcb) \
+return false;   \
+} while (0)
+
+static bool trans_c_zext_b(DisasContext *ctx, arg_c_zext_b *a)
+{
+REQUIRE_ZCB(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext8u_tl);
+}
+
+static bool trans_c_zext_h(DisasContext *ctx, arg_c_zext_h *a)
+{
+REQUIRE_ZCB(ctx);
+REQUIRE_ZBB(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext16u_tl);
+}
+
+static bool trans_c_sext_b(DisasContext *ctx, arg_c_sext_b *a)
+{
+REQUIRE_ZCB(ctx);
+REQUIRE_ZBB(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext8s_tl);
+}
+
+static bool trans_c_sext_h(DisasContext *ctx, arg_c_sext_h *a)
+{
+REQUIRE_ZCB(ctx);
+REQUIRE_ZBB(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext16s_tl);
+}
+
+static bool trans_c_zext_w(DisasContext *ctx, arg_c_zext_w *a)
+{
+REQUIRE_64BIT(ctx);
+REQUIRE_ZCB(ctx);
+REQUIRE_ZBA(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext32u_tl);
+}
+
+static bool trans_c_not(DisasContext *ctx, arg_c_not *a)
+{
+REQUIRE_ZCB(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_not_tl);
+}
+
+static bool trans_c_mul(DisasContext *ctx, arg_c_mul *a)
+{
+REQUIRE_ZCB(ctx);
+REQUIRE_M_OR_ZMMUL(ctx);
+return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
+}
+
+static bool trans_c_lbu(DisasContext *ctx, arg_c_lbu *a)
+{
+REQUIRE_ZCB(ctx);
+return gen_load(ctx, a, MO_UB);
+}
+
+static bool 

Re: [PATCH maybe-7.2 1/3] hw/i2c: only schedule pending master when bus is idle

2022-11-16 Thread Cédric Le Goater

On 11/17/22 07:40, Klaus Jensen wrote:

On Nov 16 16:58, Cédric Le Goater wrote:

On 11/16/22 09:43, Klaus Jensen wrote:

From: Klaus Jensen 

It is not given that the current master will release the bus after a
transfer ends. Only schedule a pending master if the bus is idle.

Fixes: 37fa5ca42623 ("hw/i2c: support multiple masters")
Signed-off-by: Klaus Jensen 
---
   hw/i2c/aspeed_i2c.c  |  2 ++
   hw/i2c/core.c| 37 ++---
   include/hw/i2c/i2c.h |  2 ++
   3 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
index c166fd20fa11..1f071a3811f7 100644
--- a/hw/i2c/aspeed_i2c.c
+++ b/hw/i2c/aspeed_i2c.c
@@ -550,6 +550,8 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, 
uint64_t value)
   }
   SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_STOP_CMD, 0);
   aspeed_i2c_set_state(bus, I2CD_IDLE);
+
+i2c_schedule_pending_master(bus->bus);


Shouldn't it be i2c_bus_release() ?



The reason for having both i2c_bus_release() and
i2c_schedule_pending_master() is that i2c_bus_release() sort of pairs
with i2c_bus_master(). They either set or clear the bus->bh member.

In the current design, the controller (in this case the Aspeed I2C) is
an "implicit" master (it does not have a bottom half driving it), so
there is no bus->bh to clear.

I should (and will) write some documentation on the asynchronous API.


I found the routine names confusing. Thanks for the clarification.

Maybe we could do this rename  :

  i2c_bus_release() -> i2c_bus_release_and_clear()
  i2c_schedule_pending_master() -> i2c_bus_release()

and keep i2c_schedule_pending_master() internal the I2C core subsystem.

C.




Re: [PATCH for 8.0 v7 10/10] vdpa: Always start CVQ in SVQ mode if possible

2022-11-16 Thread Jason Wang



在 2022/11/16 23:05, Eugenio Pérez 写道:

Isolate control virtqueue in its own group, allowing to intercept control
commands but letting dataplane run totally passthrough to the guest.

Signed-off-by: Eugenio Pérez 
---
v7:
* Never ask for number of address spaces, just react if isolation is not
   possible.
* Return ASID ioctl errors instead of masking them as if the device has
   no asid.
* Simplify net_init_vhost_vdpa logic
* Add "if possible" suffix

v6:
* Disable control SVQ if the device does not support it because of
features.

v5:
* Fixing the not adding cvq buffers when x-svq=on is specified.
* Move vring state in vhost_vdpa_get_vring_group instead of using a
   parameter.
* Rename VHOST_VDPA_NET_CVQ_PASSTHROUGH to VHOST_VDPA_NET_DATA_ASID

v4:
* Squash vhost_vdpa_cvq_group_is_independent.
* Rebased on last CVQ start series, that allocated CVQ cmd bufs at load
* Do not check for cvq index on vhost_vdpa_net_prepare, we only have one
   that callback registered in that NetClientInfo.

v3:
* Make asid related queries print a warning instead of returning an
   error and stop the start of qemu.
---
  hw/virtio/vhost-vdpa.c |   3 +-
  net/vhost-vdpa.c   | 117 +++--
  2 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 852baf8b2c..a29a18a6a9 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -653,7 +653,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
  {
  uint64_t features;
  uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
-0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
+0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
+0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
  int r;
  
  if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, )) {

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index a9c864741a..dc13a49311 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -101,6 +101,8 @@ static const uint64_t vdpa_svq_device_features =
  BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
  BIT_ULL(VIRTIO_NET_F_STANDBY);
  
+#define VHOST_VDPA_NET_CVQ_ASID 1

+
  VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
  {
  VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
@@ -242,6 +244,40 @@ static NetClientInfo net_vhost_vdpa_info = {
  .check_peer_type = vhost_vdpa_check_peer_type,
  };
  
+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)

+{
+struct vhost_vring_state state = {
+.index = vq_index,
+};
+int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, );
+
+if (unlikely(r < 0)) {
+error_report("Cannot get VQ %u group: %s", vq_index,
+ g_strerror(errno));
+return r;
+}
+
+return state.num;
+}
+
+static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
+   unsigned vq_group,
+   unsigned asid_num)
+{
+struct vhost_vring_state asid = {
+.index = vq_group,
+.num = asid_num,
+};
+int r;
+
+r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, );
+if (unlikely(r < 0)) {
+error_report("Can't set vq group %u asid %u, errno=%d (%s)",
+ asid.index, asid.num, errno, g_strerror(errno));
+}
+return r;
+}
+
  static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
  {
  VhostIOVATree *tree = v->iova_tree;
@@ -316,11 +352,69 @@ dma_map_err:
  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
  {
  VhostVDPAState *s;
-int r;
+struct vhost_vdpa *v;
+uint64_t backend_features;
+int64_t cvq_group;
+int cvq_index, r;
  
  assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
  
  s = DO_UPCAST(VhostVDPAState, nc, nc);

+v = >vhost_vdpa;
+
+v->shadow_data = s->always_svq;
+v->shadow_vqs_enabled = s->always_svq;
+s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
+
+if (s->always_svq) {
+goto out;
+}
+
+/* Backend features are not available in v->dev yet. */
+r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, _features);
+if (unlikely(r < 0)) {
+error_report("Cannot get vdpa backend_features: %s(%d)",
+g_strerror(errno), errno);
+return -1;
+}
+if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) ||
+!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {



I think there should be some logic to block migration in this case?



+return 0;
+}
+
+/**
+ * Check if all the virtqueues of the virtio device are in a different vq
+ * than the last vq. VQ group of last group passed in cvq_group.
+ */
+cvq_index = v->dev->vq_index_end - 1;
+cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
+if (unlikely(cvq_group < 0)) {
+return cvq_group;x
+}
+for (int i = 0; i < cvq_index; ++i) {
+   

Re: [PATCH RFC 2/3] hw/i2c: add mctp core

2022-11-16 Thread Klaus Jensen
On Nov 16 08:27, Corey Minyard wrote:
> On Wed, Nov 16, 2022 at 09:43:11AM +0100, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > Add an abstract MCTP over I2C endpoint model. This implements MCTP
> > control message handling as well as handling the actual I2C transport
> > (packetization).
> > 
> > Devices are intended to derive from this and implement the class
> > methods.
> > 
> > Parts of this implementation is inspired by code[1] previously posted by
> > Jonathan Cameron.
> 
> I have some comments inline, mostly about buffer handling.  Buffer
> handling is scary to me, so you might see some paranoia here :-).
> 

Totally understood :) Thanks for the review!

> > 
> >   [1]: 
> > https://lore.kernel.org/qemu-devel/20220520170128.4436-1-jonathan.came...@huawei.com/
> > 
> > Signed-off-by: Klaus Jensen 
> > ---
> >  hw/arm/Kconfig |   1 +
> >  hw/i2c/Kconfig |   4 +
> >  hw/i2c/mctp.c  | 365 +
> >  hw/i2c/meson.build |   1 +
> >  hw/i2c/trace-events|  12 ++
> >  include/hw/i2c/mctp.h  |  83 ++
> >  include/hw/misc/mctp.h |  43 +
> >  7 files changed, 509 insertions(+)
> >  create mode 100644 hw/i2c/mctp.c
> >  create mode 100644 include/hw/i2c/mctp.h
> >  create mode 100644 include/hw/misc/mctp.h
> > 
> > diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
> > index 17fcde8e1ccc..3233bdc193d7 100644
> > --- a/hw/arm/Kconfig
> > +++ b/hw/arm/Kconfig
> > @@ -444,6 +444,7 @@ config ASPEED_SOC
> >  select DS1338
> >  select FTGMAC100
> >  select I2C
> > +select MCTP_I2C
> >  select DPS310
> >  select PCA9552
> >  select SERIAL
> > diff --git a/hw/i2c/Kconfig b/hw/i2c/Kconfig
> > index 9bb8870517f8..5dd43d550c32 100644
> > --- a/hw/i2c/Kconfig
> > +++ b/hw/i2c/Kconfig
> > @@ -41,3 +41,7 @@ config PCA954X
> >  config PMBUS
> >  bool
> >  select SMBUS
> > +
> > +config MCTP_I2C
> > +bool
> > +select I2C
> > diff --git a/hw/i2c/mctp.c b/hw/i2c/mctp.c
> > new file mode 100644
> > index ..46376de95a98
> > --- /dev/null
> > +++ b/hw/i2c/mctp.c
> > @@ -0,0 +1,365 @@
> > +/*
> > + * SPDX-License-Identifier: GPL-2.0-or-later
> > + * SPDX-FileCopyrightText: Copyright (c) 2022 Samsung Electronics Co., Ltd.
> > + * SPDX-FileContributor: Klaus Jensen 
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu/main-loop.h"
> > +
> > +#include "hw/qdev-properties.h"
> > +#include "hw/i2c/i2c.h"
> > +#include "hw/i2c/mctp.h"
> > +
> > +#include "trace.h"
> > +
> > +static uint8_t crc8(uint16_t data)
> > +{
> > +#define POLY (0x1070U << 3)
> > +int i;
> > +
> > +for (i = 0; i < 8; i++) {
> > +if (data & 0x8000) {
> > +data = data ^ POLY;
> > +}
> > +
> > +data = data << 1;
> > +}
> > +
> > +return (uint8_t)(data >> 8);
> > +#undef POLY
> > +}
> > +
> > +static uint8_t i2c_smbus_pec(uint8_t crc, uint8_t *buf, size_t len)
> > +{
> > +int i;
> > +
> > +for (i = 0; i < len; i++) {
> > +crc = crc8((crc ^ buf[i]) << 8);
> > +}
> > +
> > +return crc;
> > +}
> 
> The PEC calculation probably belongs in it's own smbus.c file, since
> it's generic, so someone looking will find it.
> 

Makes sense. I'll move it.

> > +
> > +void i2c_mctp_schedule_send(MCTPI2CEndpoint *mctp)
> > +{
> > +I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(DEVICE(mctp)));
> > +
> > +mctp->tx.state = I2C_MCTP_STATE_TX_START_SEND;
> > +
> > +i2c_bus_master(i2c, mctp->tx.bh);
> > +}
> > +
> > +static void i2c_mctp_tx(void *opaque)
> > +{
> > +DeviceState *dev = DEVICE(opaque);
> > +I2CBus *i2c = I2C_BUS(qdev_get_parent_bus(dev));
> > +I2CSlave *slave = I2C_SLAVE(dev);
> > +MCTPI2CEndpoint *mctp = MCTP_I2C_ENDPOINT(dev);
> > +MCTPI2CEndpointClass *mc = MCTP_I2C_ENDPOINT_GET_CLASS(mctp);
> > +MCTPI2CPacket *pkt = (MCTPI2CPacket *)mctp->buffer;
> > +uint8_t flags = 0;
> > +
> > +switch (mctp->tx.state) {
> > +case I2C_MCTP_STATE_TX_SEND_BYTE:
> > +if (mctp->pos < mctp->len) {
> > +uint8_t byte = mctp->buffer[mctp->pos];
> > +
> > +trace_i2c_mctp_tx_send_byte(mctp->pos, byte);
> > +
> > +/* send next byte */
> > +i2c_send_async(i2c, byte);
> > +
> > +mctp->pos++;
> > +
> > +break;
> > +}
> > +
> > +/* packet sent */
> > +i2c_end_transfer(i2c);
> > +
> > +/* fall through */
> > +
> > +case I2C_MCTP_STATE_TX_START_SEND:
> > +if (mctp->tx.is_control) {
> > +/* packet payload is already in buffer */
> > +flags |= MCTP_H_FLAGS_SOM | MCTP_H_FLAGS_EOM;
> > +} else {
> > +/* get message bytes from derived device */
> > +mctp->len = mc->get_message_bytes(mctp, pkt->mctp.payload,
> > +  I2C_MCTP_MAXMTU, );
> > +}
> > +
> > +if (!mctp->len) {
> > +trace_i2c_mctp_tx_done();

Re: [PATCH] target/ppc: Fix build warnings when building with 'disable-tcg'

2022-11-16 Thread Cédric Le Goater

On 11/17/22 03:13, Vaibhav Jain wrote:

Greg Kurz  writes:


Hi Vaibhav,


Hey Greg,

Good to see you,


Nice to see some people are still building QEMU at IBM ;-)

Yeah, and will hopefully continue to do this in future :-)



Reported-by: Kowshik Jois B S 
Signed-off-by: Vaibhav Jain 
---


Reviewed-by: Greg Kurz 


Thanks


This was introduced by a recent commit.

Fixes: 61bd1d29421a ("target/ppc: Convert to tcg_ops restore_state_to_opc")

Thanks again for pointing out the commit that caused this.




Vaibhav,

This is serious enough it should get fixed in 7.2. Please fill up an
issue as explain in [1].

I have raised an issue on gitlab at
https://gitlab.com/qemu-project/qemu/-/issues/1319



And so,

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/377

Thanks,

C.




Re: [PATCH maybe-7.2 1/3] hw/i2c: only schedule pending master when bus is idle

2022-11-16 Thread Klaus Jensen
On Nov 16 16:58, Cédric Le Goater wrote:
> On 11/16/22 09:43, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > It is not given that the current master will release the bus after a
> > transfer ends. Only schedule a pending master if the bus is idle.
> > 
> > Fixes: 37fa5ca42623 ("hw/i2c: support multiple masters")
> > Signed-off-by: Klaus Jensen 
> > ---
> >   hw/i2c/aspeed_i2c.c  |  2 ++
> >   hw/i2c/core.c| 37 ++---
> >   include/hw/i2c/i2c.h |  2 ++
> >   3 files changed, 26 insertions(+), 15 deletions(-)
> > 
> > diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
> > index c166fd20fa11..1f071a3811f7 100644
> > --- a/hw/i2c/aspeed_i2c.c
> > +++ b/hw/i2c/aspeed_i2c.c
> > @@ -550,6 +550,8 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus 
> > *bus, uint64_t value)
> >   }
> >   SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_STOP_CMD, 0);
> >   aspeed_i2c_set_state(bus, I2CD_IDLE);
> > +
> > +i2c_schedule_pending_master(bus->bus);
> 
> Shouldn't it be i2c_bus_release() ?
> 

The reason for having both i2c_bus_release() and
i2c_schedule_pending_master() is that i2c_bus_release() sort of pairs
with i2c_bus_master(). They either set or clear the bus->bh member.

In the current design, the controller (in this case the Aspeed I2C) is
an "implicit" master (it does not have a bottom half driving it), so
there is no bus->bh to clear.

I should (and will) write some documentation on the asynchronous API.


signature.asc
Description: PGP signature


Re: [PATCH v2 3/3] nvme: Add physical writes/reads from OCP log

2022-11-16 Thread Klaus Jensen
On Nov 16 17:19, Joel Granados wrote:
> On Tue, Nov 15, 2022 at 12:26:17PM +0100, Klaus Jensen wrote:
> > On Nov 14 14:50, Joel Granados wrote:
> > >  
> > > +static uint16_t nvme_vendor_specific_log(uint8_t lid, NvmeCtrl *n, 
> > > uint8_t rae,
> > > + uint32_t buf_len, uint64_t off,
> > > + NvmeRequest *req)
> > 
> > `NvmeCtrl *n` must be first parameter.
> Any reason why this is the case? I'll change it in my code, but would be
> nice to understand the reason.
> 

No other reason than consistency with existing code.


signature.asc
Description: PGP signature


Re: [PATCH for 8.0 v7 09/10] vdpa: Add shadow_data to vhost_vdpa

2022-11-16 Thread Jason Wang



在 2022/11/16 23:05, Eugenio Pérez 写道:

The memory listener that thells the device how to convert GPA to qemu's
va is registered against CVQ vhost_vdpa. memory listener translations
are always ASID 0, CVQ ones are ASID 1 if supported.

Let's tell the listener if it needs to register them on iova tree or
not.

Signed-off-by: Eugenio Pérez 



Acked-by: Jason Wang 

Thanks



---
v7: Rename listener_shadow_vq to shadow_data
v5: Solve conflict about vhost_iova_tree_remove accepting mem_region by
 value.
---
  include/hw/virtio/vhost-vdpa.h | 2 ++
  hw/virtio/vhost-vdpa.c | 6 +++---
  net/vhost-vdpa.c   | 1 +
  3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index e57dfa1fd1..45b969a311 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -40,6 +40,8 @@ typedef struct vhost_vdpa {
  struct vhost_vdpa_iova_range iova_range;
  uint64_t acked_features;
  bool shadow_vqs_enabled;
+/* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
+bool shadow_data;
  /* IOVA mapping used by the Shadow Virtqueue */
  VhostIOVATree *iova_tree;
  GPtrArray *shadow_vqs;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 1e4e1cb523..852baf8b2c 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -224,7 +224,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener 
*listener,
   vaddr, section->readonly);
  
  llsize = int128_sub(llend, int128_make64(iova));

-if (v->shadow_vqs_enabled) {
+if (v->shadow_data) {
  int r;
  
  mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr,

@@ -251,7 +251,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener 
*listener,
  return;
  
  fail_map:

-if (v->shadow_vqs_enabled) {
+if (v->shadow_data) {
  vhost_iova_tree_remove(v->iova_tree, mem_region);
  }
  
@@ -296,7 +296,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
  
  llsize = int128_sub(llend, int128_make64(iova));
  
-if (v->shadow_vqs_enabled) {

+if (v->shadow_data) {
  const DMAMap *result;
  const void *vaddr = memory_region_get_ram_ptr(section->mr) +
  section->offset_within_region +
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 5185ac7042..a9c864741a 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -570,6 +570,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
  s->vhost_vdpa.index = queue_pair_index;
  s->always_svq = svq;
  s->vhost_vdpa.shadow_vqs_enabled = svq;
+s->vhost_vdpa.shadow_data = svq;
  s->vhost_vdpa.iova_tree = iova_tree;
  if (!is_datapath) {
  s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),





Re: [PATCH for 8.0 v7 08/10] vdpa: Store x-svq parameter in VhostVDPAState

2022-11-16 Thread Jason Wang



在 2022/11/16 23:05, Eugenio Pérez 写道:

CVQ can be shadowed two ways:
- Device has x-svq=on parameter (current way)
- The device can isolate CVQ in its own vq group

QEMU needs to check for the second condition dynamically, because CVQ
index is not known at initialization time. Since this is dynamic, the
CVQ isolation could vary with different conditions, making it possible
to go from "not isolated group" to "isolated".

Saving the cmdline parameter in an extra field so we never disable CVQ
SVQ in case the device was started with cmdline.

Signed-off-by: Eugenio Pérez 



Acked-by: Jason Wang 

Thanks



---
  net/vhost-vdpa.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 89b01fcaec..5185ac7042 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -38,6 +38,8 @@ typedef struct VhostVDPAState {
  void *cvq_cmd_out_buffer;
  virtio_net_ctrl_ack *status;
  
+/* The device always have SVQ enabled */

+bool always_svq;
  bool started;
  } VhostVDPAState;
  
@@ -566,6 +568,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
  
  s->vhost_vdpa.device_fd = vdpa_device_fd;

  s->vhost_vdpa.index = queue_pair_index;
+s->always_svq = svq;
  s->vhost_vdpa.shadow_vqs_enabled = svq;
  s->vhost_vdpa.iova_tree = iova_tree;
  if (!is_datapath) {





Re: [PATCH for 8.0 v7 07/10] vdpa: Add asid parameter to vhost_vdpa_dma_map/unmap

2022-11-16 Thread Jason Wang
On Wed, Nov 16, 2022 at 11:06 PM Eugenio Pérez  wrote:
>
> So the caller can choose which ASID is destined.
>
> No need to update the batch functions as they will always be called from
> memory listener updates at the moment. Memory listener updates will
> always update ASID 0, as it's the passthrough ASID.
>
> All vhost devices's ASID are 0 at this moment.
>
> Signed-off-by: Eugenio Pérez 

Acked-by: Jason Wang 

Thanks

> ---
> v7:
> * Move comment on zero initailization of vhost_vdpa_dma_map above the
>   functions.
> * Add VHOST_VDPA_GUEST_PA_ASID macro.
>
> v5:
> * Solve conflict, now vhost_vdpa_svq_unmap_ring returns void
> * Change comment on zero initialization.
>
> v4: Add comment specifying behavior if device does not support _F_ASID
>
> v3: Deleted unneeded space
> ---
>  include/hw/virtio/vhost-vdpa.h | 14 ++---
>  hw/virtio/vhost-vdpa.c | 36 +++---
>  net/vhost-vdpa.c   |  6 +++---
>  hw/virtio/trace-events |  4 ++--
>  4 files changed, 41 insertions(+), 19 deletions(-)
>
> diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> index d85643..e57dfa1fd1 100644
> --- a/include/hw/virtio/vhost-vdpa.h
> +++ b/include/hw/virtio/vhost-vdpa.h
> @@ -19,6 +19,12 @@
>  #include "hw/virtio/virtio.h"
>  #include "standard-headers/linux/vhost_types.h"
>
> +/*
> + * ASID dedicated to map guest's addresses.  If SVQ is disabled it maps GPA 
> to
> + * qemu's IOVA.  If SVQ is enabled it maps also the SVQ vring here
> + */
> +#define VHOST_VDPA_GUEST_PA_ASID 0
> +
>  typedef struct VhostVDPAHostNotifier {
>  MemoryRegion mr;
>  void *addr;
> @@ -29,6 +35,7 @@ typedef struct vhost_vdpa {
>  int index;
>  uint32_t msg_type;
>  bool iotlb_batch_begin_sent;
> +uint32_t address_space_id;
>  MemoryListener listener;
>  struct vhost_vdpa_iova_range iova_range;
>  uint64_t acked_features;
> @@ -42,8 +49,9 @@ typedef struct vhost_vdpa {
>  VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
>  } VhostVDPA;
>
> -int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
> -   void *vaddr, bool readonly);
> -int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
> +int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
> +   hwaddr size, void *vaddr, bool readonly);
> +int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
> + hwaddr size);
>
>  #endif
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 23efb8f49d..1e4e1cb523 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -72,22 +72,28 @@ static bool 
> vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
>  return false;
>  }
>
> -int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
> -   void *vaddr, bool readonly)
> +/*
> + * The caller must set asid = 0 if the device does not support asid.
> + * This is not an ABI break since it is set to 0 by the initializer anyway.
> + */
> +int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
> +   hwaddr size, void *vaddr, bool readonly)
>  {
>  struct vhost_msg_v2 msg = {};
>  int fd = v->device_fd;
>  int ret = 0;
>
>  msg.type = v->msg_type;
> +msg.asid = asid;
>  msg.iotlb.iova = iova;
>  msg.iotlb.size = size;
>  msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
>  msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
>  msg.iotlb.type = VHOST_IOTLB_UPDATE;
>
> -   trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size,
> -msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type);
> +trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova,
> + msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm,
> + msg.iotlb.type);
>
>  if (write(fd, , sizeof(msg)) != sizeof(msg)) {
>  error_report("failed to write, fd=%d, errno=%d (%s)",
> @@ -98,18 +104,24 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr 
> iova, hwaddr size,
>  return ret;
>  }
>
> -int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
> +/*
> + * The caller must set asid = 0 if the device does not support asid.
> + * This is not an ABI break since it is set to 0 by the initializer anyway.
> + */
> +int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
> + hwaddr size)
>  {
>  struct vhost_msg_v2 msg = {};
>  int fd = v->device_fd;
>  int ret = 0;
>
>  msg.type = v->msg_type;
> +msg.asid = asid;
>  msg.iotlb.iova = iova;
>  msg.iotlb.size = size;
>  msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
>
> -trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova,
> +trace_vhost_vdpa_dma_unmap(v, fd, msg.type, 

Re: [PATCH for 8.0 v7 06/10] vdpa: Allocate SVQ unconditionally

2022-11-16 Thread Jason Wang
On Wed, Nov 16, 2022 at 11:06 PM Eugenio Pérez  wrote:
>
> SVQ may run or not in a device depending on runtime conditions (for
> example, if the device can move CVQ to its own group or not).
>
> Allocate the SVQ array unconditionally at startup, since its hard to
> move this allocation elsewhere.
>
> Signed-off-by: Eugenio Pérez 

Acked-by: Jason Wang 

Thanks

> ---
>  hw/virtio/vhost-vdpa.c | 4 
>  1 file changed, 4 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 146f0dcb40..23efb8f49d 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -547,10 +547,6 @@ static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
>  struct vhost_vdpa *v = dev->opaque;
>  size_t idx;
>
> -if (!v->shadow_vqs) {
> -return;
> -}
> -
>  for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
>  vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
>  }
> --
> 2.31.1
>




Re: [PATCH for 8.0 v7 05/10] vdpa: move SVQ vring features check to net/

2022-11-16 Thread Jason Wang
On Wed, Nov 16, 2022 at 11:06 PM Eugenio Pérez  wrote:
>
> The next patches will start control SVQ if possible. However, we don't
> know if that will be possible at qemu boot anymore.
>
> Since the moved checks will be already evaluated at net/ to know if it
> is ok to shadow CVQ, move them.
>
> Signed-off-by: Eugenio Pérez 

Acked-by: Jason Wang 

Thanks

> ---
>  hw/virtio/vhost-vdpa.c | 33 ++---
>  net/vhost-vdpa.c   |  3 ++-
>  2 files changed, 4 insertions(+), 32 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 3df2775760..146f0dcb40 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -402,29 +402,9 @@ static int vhost_vdpa_get_dev_features(struct vhost_dev 
> *dev,
>  return ret;
>  }
>
> -static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
> -   Error **errp)
> +static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v)
>  {
>  g_autoptr(GPtrArray) shadow_vqs = NULL;
> -uint64_t dev_features, svq_features;
> -int r;
> -bool ok;
> -
> -if (!v->shadow_vqs_enabled) {
> -return 0;
> -}
> -
> -r = vhost_vdpa_get_dev_features(hdev, _features);
> -if (r != 0) {
> -error_setg_errno(errp, -r, "Can't get vdpa device features");
> -return r;
> -}
> -
> -svq_features = dev_features;
> -ok = vhost_svq_valid_features(svq_features, errp);
> -if (unlikely(!ok)) {
> -return -1;
> -}
>
>  shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
>  for (unsigned n = 0; n < hdev->nvqs; ++n) {
> @@ -436,7 +416,6 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
> struct vhost_vdpa *v,
>  }
>
>  v->shadow_vqs = g_steal_pointer(_vqs);
> -return 0;
>  }
>
>  static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
> @@ -461,11 +440,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void 
> *opaque, Error **errp)
>  dev->opaque =  opaque ;
>  v->listener = vhost_vdpa_memory_listener;
>  v->msg_type = VHOST_IOTLB_MSG_V2;
> -ret = vhost_vdpa_init_svq(dev, v, errp);
> -if (ret) {
> -goto err;
> -}
> -
> +vhost_vdpa_init_svq(dev, v);
>  vhost_vdpa_get_iova_range(v);
>
>  if (!vhost_vdpa_first_dev(dev)) {
> @@ -476,10 +451,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void 
> *opaque, Error **errp)
> VIRTIO_CONFIG_S_DRIVER);
>
>  return 0;
> -
> -err:
> -ram_block_discard_disable(false);
> -return ret;
>  }
>
>  static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index e98d5f5eac..dd9cea42d0 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -117,9 +117,10 @@ static bool vhost_vdpa_net_valid_svq_features(uint64_t 
> features, Error **errp)
>  if (invalid_dev_features) {
>  error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
> invalid_dev_features);
> +return false;
>  }
>
> -return !invalid_dev_features;
> +return vhost_svq_valid_features(features, errp);
>  }
>
>  static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
> --
> 2.31.1
>




Re: [PATCH for 8.0 v7 01/10] vdpa: Use v->shadow_vqs_enabled in vhost_vdpa_svqs_start & stop

2022-11-16 Thread Jason Wang
On Wed, Nov 16, 2022 at 11:06 PM Eugenio Pérez  wrote:
>
> This function used to trust in v->shadow_vqs != NULL to know if it must
> start svq or not.
>
> This is not going to be valid anymore, as qemu is going to allocate svq
> unconditionally (but it will only start them conditionally).
>
> Signed-off-by: Eugenio Pérez 

Acked-by: Jason Wang 

Thanks

> ---
>  hw/virtio/vhost-vdpa.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 7468e44b87..7f0ff4df5b 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -1029,7 +1029,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
>  Error *err = NULL;
>  unsigned i;
>
> -if (!v->shadow_vqs) {
> +if (!v->shadow_vqs_enabled) {
>  return true;
>  }
>
> @@ -1082,7 +1082,7 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
>  {
>  struct vhost_vdpa *v = dev->opaque;
>
> -if (!v->shadow_vqs) {
> +if (!v->shadow_vqs_enabled) {
>  return;
>  }
>
> --
> 2.31.1
>




[PATCH v5] acpi/tests/avocado/bits: some misc fixes

2022-11-16 Thread Ani Sinha
Most of the changes are trivial. The bits test timeout has now been increased
to 200 seconds in order to accommodate slower systems and fewer unnecessary
failures. Removed of the reference to non-existent README file in docs. Some
minor corrections in the doc file.

CC: Thomas Huth 
CC: Michael S. Tsirkin 
CC: qemu-triv...@nongnu.org
Signed-off-by: Ani Sinha 
Reviewed-by: Thomas Huth 
---
 docs/devel/acpi-bits.rst   | 12 
 tests/avocado/acpi-bits.py |  3 +++
 2 files changed, 7 insertions(+), 8 deletions(-)

changes from v1: address Thomas' suggestions.
changes from v2: more minor corrections in doc, tags added.
changes from v3: raised timeout to 200 secs overriding the default
avocado timeout of 120 secs.
changes from v4: rebased to adjust for changes from john's patch 
https://www.mail-archive.com/qemu-devel@nongnu.org/msg921975.html

diff --git a/docs/devel/acpi-bits.rst b/docs/devel/acpi-bits.rst
index c9564d871a..56e76338c3 100644
--- a/docs/devel/acpi-bits.rst
+++ b/docs/devel/acpi-bits.rst
@@ -16,11 +16,8 @@ end user. The other is that we have more control of what we 
wanted to test
 and how by directly using acpica interpreter on top of the bios on a running
 system. More details on the inspiration for developing biosbits and its real
 life uses can be found in [#a]_ and [#b]_.
-This directory contains tests written in python using avocado framework that
-exercises the QEMU bios components using biosbits and reports test failures.
 For QEMU, we maintain a fork of bios bits in gitlab along with all the
-dependent submodules:
-https://gitlab.com/qemu-project/biosbits-bits
+dependent submodules here: https://gitlab.com/qemu-project/biosbits-bits
 This fork contains numerous fixes, a newer acpica and changes specific to
 running this avocado QEMU tests using bits. The author of this document
 is the sole maintainer of the QEMU fork of bios bits repo.
@@ -38,10 +35,9 @@ Under ``tests/avocado/`` as the root we have:
│ ├── bits-config
│ │ └── bits-cfg.txt
│ ├── bits-tests
-   │ │ ├── smbios.py2
-   │ │ ├── testacpi.py2
-   │ │ └── testcpuid.py2
-   │ └── README
+   │   ├── smbios.py2
+   │   ├── testacpi.py2
+   │   └── testcpuid.py2
├── acpi-bits.py
 
 * ``tests/avocado``:
diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py
index dd1f238ae2..ec2261893d 100644
--- a/tests/avocado/acpi-bits.py
+++ b/tests/avocado/acpi-bits.py
@@ -134,6 +134,9 @@ class AcpiBitsTest(QemuBaseTest): #pylint: 
disable=too-many-instance-attributes
 :avocado: tags=acpi
 
 """
+# in slower systems the test can take as long as 3 minutes to complete.
+timeout = 200
+
 def __init__(self, *args, **kwargs):
 super().__init__(*args, **kwargs)
 self._vm = None
-- 
2.34.1




Re: [PATCH 2/2] Do not access /dev/mem in MSI-X PCI passthrough on Xen

2022-11-16 Thread Marek Marczykowski-Górecki
On Wed, Nov 16, 2022 at 10:40:02PM +0100, Marek Marczykowski-Górecki wrote:
> On Wed, Nov 16, 2022 at 02:15:22PM -0500, Jason Andryuk wrote:
> > On Mon, Nov 14, 2022 at 2:21 PM Marek Marczykowski-Górecki
> >  wrote:
> > >
> > > The /dev/mem is used for two purposes:
> > >  - reading PCI_MSIX_ENTRY_CTRL_MASKBIT
> > >  - reading Pending Bit Array (PBA)
> > >
> > > The first one was originally done because when Xen did not send all
> > > vector ctrl writes to the device model, so QEMU might have outdated old
> > > register value. This has been changed in Xen, so QEMU can now use its
> > > cached value of the register instead.
> > >
> > > The Pending Bit Array (PBA) handling is for the case where it lives on
> > > the same page as the MSI-X table itself. Xen has been extended to handle
> > > this case too (as well as other registers that may live on those pages),
> > > so QEMU handling is not necessary anymore.
> > >
> > > Removing /dev/mem access is useful to work within stubdomain, and
> > > necessary when dom0 kernel runs in lockdown mode.
> > >
> > > Signed-off-by: Marek Marczykowski-Górecki 
> > > 
> > 
> > I put the Xen, QEMU, and xen-pciback patches into OpenXT and gave a
> > little test.  When pci_permissive=0, iwlwifi fails to load its
> > firmware.  With pci_permissive=1, it looks like MSI-X is enabled. (I
> > previously included your libxl allow_interrupt_control patch - that
> > seemed to get regular MSIs working prior to the MSI-X patches.)  I
> > also removed the OpenXT equivalent of 0005-Disable-MSI-X-caps.patch.
> > I am testing with Linux 5.4.y, so that could be another factor.
> 
> Can you confirm the allow_interrupt_control is set by libxl? Also,
> vanilla 5.4 doesn't have the allow_interrupt_control patch at all, and you
> may have an earlier version that had "allow_msi_enable" as the sysfs
> file name.

Ok, I found what is wrong. Enabling MSI-X is refused, because INTx isn't
disabled at this point yet. And apparently I was testing this with
permissive=1...

Linux does this:
https://github.com/torvalds/linux/blob/master/drivers/pci/msi/msi.c#L611
In short:
1. Enable MSI-X with MASKALL=1
2. Setup MSI-X table
3. Disable INTx
4. Set MASKALL=0

This patch on top should fix this:
8<
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c 
b/drivers/xen/xen-pciback/conf_space_capability.c
index 097316a74126..f4c4381de76e 100644
--- a/drivers/xen/xen-pciback/conf_space_capability.c
+++ b/drivers/xen/xen-pciback/conf_space_capability.c
@@ -235,7 +235,7 @@ static int msi_msix_flags_write(struct pci_dev *dev, int 
offset, u16 new_value,
(new_value ^ old_value) & ~field_config->allowed_bits)
return PCIBIOS_SET_FAILED;
 
-   if (new_value & field_config->enable_bit) {
+   if ((new_value & field_config->allowed_bits) == 
field_config->enable_bit) {
/* don't allow enabling together with other interrupt types */
int int_type = xen_pcibk_get_interrupt_type(dev);
 
8<

Jan, is the above safe? It should prevent clearing MASKALL if INTx isn't
disabled, unless I missed something? But also, it will allow enabling
MSI-X with MASKALL=1 together with MSI, which I'm not sure if should be
allowed.
Alternatively to the above patch, I could allow specifically setting
MSIX_FLAGS_ENABLE + MSIX_FLAGS_MASKALL while INTx isn't disabled as a
single exception, but at this point I'm not sure if some other driver or
OS wouldn't approach this in yet another way.

-- 
Best Regards,
Marek Marczykowski-Górecki
Invisible Things Lab


signature.asc
Description: PGP signature


Re: [PATCH] target/arm: Limit LPA2 effective output address when TCR.DS == 0

2022-11-16 Thread Richard Henderson

On 11/16/22 09:03, Ard Biesheuvel wrote:

diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 3745ac9723474332..9a6277d862fac229 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -1222,6 +1222,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
S1Translate *ptw,
  ps = MIN(ps, param.ps);
  assert(ps < ARRAY_SIZE(pamax_map));
  outputsize = pamax_map[ps];
+
+/*
+ * With LPA2, the effective output address (OA) size is at most 48 bits
+ * unless TCR.DS == 1
+ */
+if (!param.ds && param.gran != Gran64K) {
+outputsize = MIN(outputsize, 48);
+}


Reviewed-by: Richard Henderson 

I thought about moving this back into aa64_va_parameters, similar to how we bound tsz, but 
since this is the only use of param.ps, this placement is as good as any.



r~






RE: [PATCH for-7.2] rtl8139: honor large send MSS value

2022-11-16 Thread Tobias Fiebig
Heho,
Ok, I just learned more C than I ever wanted to. There is a bit more amiss here 
(ll from 7d7238c72b983cff5064734349d2d45be9c6282c):

In line 1916 of rtl8139.c we set txdw0; If we calculate the MSS at this point, 
it is consistently 12 below requested, but generally accurate. The bits that 
flip re: -12 must happen somewhere in the Linux kernel driver (ll 764 in 
drivers/net/ethernet/realtek/8139cp.c?); Didn't look there in-depth yet (and do 
not plan to, maybe one of you has more experience with this?) Given the 
consistency of this deviation, maybe just doing a +12 might be more straight 
forward.

However, in ll2030ff we reset a couple of status indicators. These overlap with 
the fields for the MSS, leading to inaccurate values being calculated later on; 
For example, requesting an MSS of 767 leads to an MSS of 3 being calculated by 
your patch; Similarly, requesting 1000 leads to 268. At least for the latter I 
see packets of that size being generated on the wire (which should also not 
happen, as the MSS should never be below 536; maybe a check could help here to 
make sure we are not trusting arbitrary values from the driver, esp. given the 
bobble of sec issues around PMTUD/MSS; Technically, now that MSS is defined 
earlier, we could also move this closer to the start of TSO large frame 
handling).

Below is also a draft patch following my suggestions (save txdw0, +12, check 
for <536) and some examples for what I described above, which I can on your 
last patch. Please note again that this is essentially the first time I do 
anything in C; Also, I wasn't sure what has less perf impact (save the whole 
32bit of txdw0 even though it might not be needed vs. also doing the shift/& 
even though it might not be needed).

Apart from that, my patch seems to work, and the MSS gets set correctly; 
Someone else testing would be nice, though:

# MSS_requested=1320
RTL8139: +++ C+ mode offloaded task TSO IP data 2648 frame data 2668 specified 
MSS=1320

# MSS_requested=1000
RTL8139: +++ C+ mode offloaded task TSO IP data 2008 frame data 2028 specified 
MSS=1000

# MSS_requested=600
RTL8139: +++ C+ mode offloaded task TSO IP data 1796 frame data 1816 specified 
MSS=600

With best regards,
Tobias

diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index e6643e3c9d..59321460b9 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -77,7 +77,6 @@
 ( ( input ) & ( size - 1 )  )
 
 #define ETHER_TYPE_LEN 2
-#define ETH_MTU 1500
 
 #define VLAN_TCI_LEN 2
 #define VLAN_HLEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
@@ -1934,8 +1933,9 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 #define CP_TX_LS (1<<28)
 /* large send packet flag */
 #define CP_TX_LGSEN (1<<27)
-/* large send MSS mask, bits 16...25 */
-#define CP_TC_LGSEN_MSS_MASK ((1 << 12) - 1)
+/* large send MSS mask, bits 16...26 */
+#define CP_TC_LGSEN_MSS_SHIFT 16
+#define CP_TC_LGSEN_MSS_MASK ((1 << 11) - 1)
 
 /* IP checksum offload flag */
 #define CP_TX_IPCS (1<<18)
@@ -2027,6 +2027,9 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 s->currCPlusTxDesc = 0;
 }
 
+/* store unaltered txdw0 for later use in MSS calculation*/
+uint32_t txdw0_save = txdw0;
+
 /* transfer ownership to target */
 txdw0 &= ~CP_TX_OWN;
 
@@ -2149,10 +2152,12 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 goto skip_offload;
 }
 
-int large_send_mss = (txdw0 >> 16) & CP_TC_LGSEN_MSS_MASK;
+/* set large_send_mss from txdw0 before overlapping mss fields 
were cleared */
+int large_send_mss = ((txdw0_save >> CP_TC_LGSEN_MSS_SHIFT) &
+CP_TC_LGSEN_MSS_MASK) + 12;
 
-DPRINTF("+++ C+ mode offloaded task TSO MTU=%d IP data %d "
-"frame data %d specified MSS=%d\n", ETH_MTU,
+DPRINTF("+++ C+ mode offloaded task TSO IP data %d "
+"frame data %d specified MSS=%d\n",
 ip_data_len, saved_size - ETH_HLEN, large_send_mss);
 
 int tcp_send_offset = 0;
@@ -2177,9 +2182,13 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 goto skip_offload;
 }
 
-/* ETH_MTU = ip header len + tcp header len + payload */
+/* MSS too small? Min MSS = 536 */
+if (tcp_hlen + hlen >= large_send_mss || 535 >= 
large_send_mss) {
+goto skip_offload;
+}
+
 int tcp_data_len = ip_data_len - tcp_hlen;
-int tcp_chunk_size = ETH_MTU - hlen - tcp_hlen;
+int tcp_chunk_size = large_send_mss - hlen - tcp_hlen;
 
 DPRINTF("+++ C+ mode TSO IP data len %d TCP hlen %d TCP "
 "data len %d TCP chunk size %d\n", ip_data_len,



Some examples (with additional DPRINT capturing txdw0/MSS at various places; 
txdw0_0=ll1923, txdw0_4=ll2029, txdw0_5=ll2039, 

Re: [PATCH for-8.0 1/1] target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX

2022-11-16 Thread Richard Henderson
I think you missed the Based-on tag.

r~

On Wed, 16 Nov 2022, 05:38 Daniel Henrique Barboza, 
wrote:

> Richard,
>
> I believe the ppc64-linux-user target didn't like what you did in this
> patch. Here's the error:
>
> $ ../configure
> --target-list=ppc64-softmmu,ppc64-linux-user,ppc-softmmu,ppc-linux-user,ppc64le-linux-user
> $ make -j
>
> (...)
>
> [15/133] Compiling C object
> libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o
> FAILED: libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o
> cc -m64 -mcx16 -Ilibqemu-ppc64-linux-user.fa.p -I. -I.. -Itarget/ppc
> -I../target/ppc -I../common-user/host/x86_64
> -I../linux-user/include/host/x86_64 -I../linux-user/include -Ilinux-user
> -I../linux-user -Ilinux-user/ppc -I../linux-user/ppc -Iqapi -Itrace -Iui
> -Iui/shader -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include
> -I/usr/include/sysprof-4 -fdiagnostics-color=auto -Wall -Winvalid-pch
> -Werror -std=gnu11 -O2 -g -isystem
> /home/danielhb/kvm-project/qemu/linux-headers -isystem linux-headers
> -iquote . -iquote /home/danielhb/kvm-project/qemu -iquote
> /home/danielhb/kvm-project/qemu/include -iquote
> /home/danielhb/kvm-project/qemu/tcg/i386 -pthread -U_FORTIFY_SOURCE
> -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64
> -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wundef
> -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common
> -fwrapv -Wold-style-declaration -Wold-style-definition -Wtype-limits
> -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers
> -Wempty-body -Wnested-externs -Wendif-labels -Wexpansion-to-defined
> -Wimplicit-fallthrough=2 -Wno-missing-include-dirs
> -Wno-shift-negative-value -Wno-psabi -fstack-protector-strong -fPIE
> -isystem../linux-headers -isystemlinux-headers -DNEED_CPU_H
> '-DCONFIG_TARGET="ppc64-linux-user-config-target.h"'
> '-DCONFIG_DEVICES="ppc64-linux-user-config-devices.h"' -MD -MQ
> libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o -MF
> libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o.d -o
> libqemu-ppc64-linux-user.fa.p/target_ppc_translate.c.o -c
> ../target/ppc/translate.c
> ../target/ppc/translate.c: In function ‘gen_stqcx_’:
> ../target/ppc/translate.c:3989:5: error: unknown type name ‘TCGv_i128’;
> did you mean ‘TCGv_i32’?
>   3989 | TCGv_i128 cmp, val;
>| ^
>| TCGv_i32
> ../target/ppc/translate.c:4006:11: error: implicit declaration of function
> ‘tcg_temp_new_i128’; did you mean ‘tcg_temp_new_i32’?
> [-Werror=implicit-function-declaration]
>   4006 | cmp = tcg_temp_new_i128();
>|   ^
>|   tcg_temp_new_i32
> ../target/ppc/translate.c:4006:11: error: nested extern declaration of
> ‘tcg_temp_new_i128’ [-Werror=nested-externs]
> ../target/ppc/translate.c:4009:5: error: implicit declaration of function
> ‘tcg_gen_concat_i64_i128’; did you mean ‘tcg_gen_concat_i32_i64’?
> [-Werror=implicit-function-declaration]
>   4009 | tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2,
> cpu_reserve_val);
>| ^~~
>| tcg_gen_concat_i32_i64
> ../target/ppc/translate.c:4009:5: error: nested extern declaration of
> ‘tcg_gen_concat_i64_i128’ [-Werror=nested-externs]
> ../target/ppc/translate.c:4014:5: error: implicit declaration of function
> ‘tcg_gen_atomic_cmpxchg_i128’; did you mean ‘tcg_gen_atomic_cmpxchg_i32’?
> [-Werror=implicit-function-declaration]
>   4014 | tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val,
> ctx->mem_idx,
>| ^~~
>| tcg_gen_atomic_cmpxchg_i32
> ../target/ppc/translate.c:4014:5: error: nested extern declaration of
> ‘tcg_gen_atomic_cmpxchg_i128’ [-Werror=nested-externs]
> ../target/ppc/translate.c:4016:5: error: implicit declaration of function
> ‘tcg_temp_free_i128’; did you mean ‘tcg_temp_free_i32’?
> [-Werror=implicit-function-declaration]
>   4016 | tcg_temp_free_i128(cmp);
>| ^~
>| tcg_temp_free_i32
> ../target/ppc/translate.c:4016:5: error: nested extern declaration of
> ‘tcg_temp_free_i128’ [-Werror=nested-externs]
> ../target/ppc/translate.c:4020:5: error: implicit declaration of function
> ‘tcg_gen_extr_i128_i64’; did you mean ‘tcg_gen_ext_i32_i64’?
> [-Werror=implicit-function-declaration]
>   4020 | tcg_gen_extr_i128_i64(t1, t0, val);
>| ^
>| tcg_gen_ext_i32_i64
> ../target/ppc/translate.c:4020:5: error: nested extern declaration of
> ‘tcg_gen_extr_i128_i64’ [-Werror=nested-externs]
> cc1: all warnings being treated as errors
> [16/133] Compiling C object
> libqemu-ppc64-softmmu.fa.p/target_ppc_mmu_helper.c.o
> [17/133] Compiling C object
> libqemu-ppc64-softmmu.fa.p/target_ppc_translate.c.o
> FAILED: libqemu-ppc64-softmmu.fa.p/target_ppc_translate.c.o
>
>
> Thanks,
>
>
> Daniel
>
>
> On 11/12/22 03:11, Richard Henderson wrote:
> > Note that the previous direct reference to reserve_val,
> >
> > -   

Re: [PATCH-for-7.2] target/ppc: Fix build warnings when building with 'disable-tcg'

2022-11-16 Thread Vaibhav Jain


Hi Philippe,

Philippe Mathieu-Daudé  writes:
>
> Oops sorry.
>
> Fixes: 61bd1d2942 ("target/ppc: Convert to tcg_ops restore_state_to_opc")
Thanks for figuring and pointing this out

>
>> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
>> index a05a2ed595..94adcb766b 100644
>> --- a/target/ppc/excp_helper.c
>> +++ b/target/ppc/excp_helper.c
>> @@ -2842,6 +2842,7 @@ void helper_td(CPUPPCState *env, target_ulong arg1, 
>> target_ulong arg2,
>>   #endif
>>   #endif
>>   
>> +#ifdef CONFIG_TCG
>>   static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t 
>> lane)
>>   {
>>   const uint16_t c = 0xfffc;
>> @@ -2924,6 +2925,7 @@ HELPER_HASH(HASHST, env->spr[SPR_HASHKEYR], true)
>>   HELPER_HASH(HASHCHK, env->spr[SPR_HASHKEYR], false)
>>   HELPER_HASH(HASHSTP, env->spr[SPR_HASHPKEYR], true)
>>   HELPER_HASH(HASHCHKP, env->spr[SPR_HASHPKEYR], false)
>> +#endif /* CONFIG_TCG */
>
> Fixes: 670f1da374 ("target/ppc: Implement hashst and hashchk")
>
Thanks

> Hmm this is another fix... You could split your patch in 2,
> but not a big deal. Regardless:

This being a trivial patch, descided against splitting it in 2.


> Reviewed-by: Philippe Mathieu-Daudé 
Thanks,


-- 
Cheers
~ Vaibhav



Re: [PATCH] target/ppc: Fix build warnings when building with 'disable-tcg'

2022-11-16 Thread Vaibhav Jain
Greg Kurz  writes:

> Hi Vaibhav,
>
Hey Greg,

Good to see you,

> Nice to see some people are still building QEMU at IBM ;-)
Yeah, and will hopefully continue to do this in future :-)


>> Reported-by: Kowshik Jois B S 
>> Signed-off-by: Vaibhav Jain 
>> ---
>
> Reviewed-by: Greg Kurz 
>
Thanks

> This was introduced by a recent commit.
>
> Fixes: 61bd1d29421a ("target/ppc: Convert to tcg_ops restore_state_to_opc")
Thanks again for pointing out the commit that caused this.

>
>
> Vaibhav,
>
> This is serious enough it should get fixed in 7.2. Please fill up an
> issue as explain in [1].
I have raised an issue on gitlab at
https://gitlab.com/qemu-project/qemu/-/issues/1319


-- 
Cheers
~ Vaibhav



Re: [PATCH] tests/avocado: configure acpi-bits to use avocado timeout

2022-11-16 Thread Ani Sinha
On Wed, Nov 16, 2022 at 11:31 PM John Snow  wrote:

>
>
> On Tue, Nov 15, 2022, 10:24 PM Ani Sinha  wrote:
>
>> On Wed, Nov 16, 2022 at 2:58 AM John Snow  wrote:
>> >
>> > Instead of using a hardcoded timeout, just rely on Avocado's built-in
>> > test case timeout. This helps avoid timeout issues on machines where 60
>> > seconds is not sufficient.
>> >
>> > Signed-off-by: John Snow 
>>
>> Reviewed-by: Ani Sinha 
>>
>
> Alex's critique is valid, though: the way vm.wait() works is to
> immediately  terminate the serial console connection as it prepares for the
> VM to shut down. I forgot about this.
>
> (For historical reasons, it does this to avoid deadlocks when the pipe
> fills.)
>
> I think we definitely do want to make sure we watch the console *while* we
> wait for it to shut down, which is not a feature QEMUMachine really offers
> right now in a meaningful way.
>

Maybe  we can push your current patch while we consider these console
logging enhancements for the next release window. Console logging woikd
require some changes in bits and some more testing. I'm not sure if I'll
have time for it immediately at present.


> I need to make some more drastic changes to machine.py, but in the
> meantime I can revise this patch to do something a bit smarter so we get
> console logging while we wait. This is a use case worth supporting.
>
> (Thanks for writing new and interesting tests!)
>
>
>> > ---
>> >  tests/avocado/acpi-bits.py | 10 ++
>> >  1 file changed, 2 insertions(+), 8 deletions(-)
>> >
>> > diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py
>> > index 8745a58a766..ac13e22dc93 100644
>> > --- a/tests/avocado/acpi-bits.py
>> > +++ b/tests/avocado/acpi-bits.py
>> > @@ -385,12 +385,6 @@ def test_acpi_smbios_bits(self):
>> >  self._vm.launch()
>> >  # biosbits has been configured to run all the specified test
>> suites
>> >  # in batch mode and then automatically initiate a vm shutdown.
>> > -# sleep for maximum of one minute
>> > -max_sleep_time = time.monotonic() + 60
>> > -while self._vm.is_running() and time.monotonic() <
>> max_sleep_time:
>> > -time.sleep(1)
>> > -
>> > -self.assertFalse(time.monotonic() > max_sleep_time,
>> > - 'The VM seems to have failed to shutdown in
>> time')
>> > -
>> > +# Rely on avocado's unit test timeout.
>> > +self._vm.wait(timeout=None)
>>
>> I think this is fine. This just waits until the VM is shutdown on its
>> own and relies on the avocado framework to timeout if it doesn't. We
>> do not need to look into the console. The test issues a shutdown from
>> the VM itself once its done with the batch operations.
>
>
> Still, if it fails, we want to see the output, right? It's very
> frustrating if it doesn't, especially in an automated pipeline.
>
>
>> >  self.parse_log()
>> > --
>> > 2.37.3
>> >
>>
>>


Re: [PATCH v9 5/8] KVM: Register/unregister the guest private memory regions

2022-11-16 Thread Sean Christopherson
On Tue, Oct 25, 2022, Chao Peng wrote:
> +static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
> +  bool is_private)
> +{
> + gfn_t start, end;
> + unsigned long i;
> + void *entry;
> + int idx;
> + int r = 0;
> +
> + if (size == 0 || gpa + size < gpa)
> + return -EINVAL;
> + if (gpa & (PAGE_SIZE - 1) || size & (PAGE_SIZE - 1))
> + return -EINVAL;
> +
> + start = gpa >> PAGE_SHIFT;
> + end = (gpa + size - 1 + PAGE_SIZE) >> PAGE_SHIFT;
> +
> + /*
> +  * Guest memory defaults to private, kvm->mem_attr_array only stores
> +  * shared memory.
> +  */
> + entry = is_private ? NULL : xa_mk_value(KVM_MEM_ATTR_SHARED);
> +
> + idx = srcu_read_lock(>srcu);
> + KVM_MMU_LOCK(kvm);
> + kvm_mmu_invalidate_begin(kvm, start, end);
> +
> + for (i = start; i < end; i++) {
> + r = xa_err(xa_store(>mem_attr_array, i, entry,
> + GFP_KERNEL_ACCOUNT));
> + if (r)
> + goto err;
> + }
> +
> + kvm_unmap_mem_range(kvm, start, end);
> +
> + goto ret;
> +err:
> + for (; i > start; i--)
> + xa_erase(>mem_attr_array, i);

I don't think deleting previous entries is correct.  To unwind, the correct 
thing
to do is restore the original values.  E.g. if userspace space is mapping a 
large
range as shared, and some of the previous entries were shared, deleting them 
would
incorrectly "convert" those entries to private.

Tracking the previous state likely isn't the best approach, e.g. it would 
require
speculatively allocating extra memory for a rare condition that is likely going 
to
lead to OOM anyways.

Instead of trying to unwind, what about updating the ioctl() params such that
retrying with the updated addr+size would Just Work?  E.g.

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 55b07aae67cc..f1de592a1a06 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1015,15 +1015,12 @@ static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, 
gpa_t gpa, gpa_t size,
 
kvm_unmap_mem_range(kvm, start, end, attr);
 
-   goto ret;
-err:
-   for (; i > start; i--)
-   xa_erase(>mem_attr_array, i);
-ret:
kvm_mmu_invalidate_end(kvm, start, end);
KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(>srcu, idx);
 
+   
+
return r;
 }
 #endif /* CONFIG_KVM_GENERIC_PRIVATE_MEM */
@@ -4989,6 +4986,8 @@ static long kvm_vm_ioctl(struct file *filp,
 
r = kvm_vm_ioctl_set_mem_attr(kvm, region.addr,
  region.size, set);
+   if (copy_to_user(argp, , sizeof(region)) && !r)
+   r = -EFAULT
break;
}
 #endif



Re: [PATCH v9 7/8] KVM: Handle page fault for private memory

2022-11-16 Thread Sean Christopherson
On Wed, Nov 16, 2022, Ackerley Tng wrote:
> >@@ -4173,6 +4203,22 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
> >struct kvm_page_fault *fault)
> > return RET_PF_EMULATE;
> > }
> >
> >+if (kvm_slot_can_be_private(slot) &&
> >+fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
> >+vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
> >+if (fault->is_private)
> >+vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
> >+else
> >+vcpu->run->memory.flags = 0;
> >+vcpu->run->memory.padding = 0;
> >+vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
> >+vcpu->run->memory.size = PAGE_SIZE;
> >+return RET_PF_USER;
> >+}
> >+
> >+if (fault->is_private)
> >+return kvm_faultin_pfn_private(fault);
> >+
> 
> Since each memslot may also not be backed by restricted memory, we
> should also check if the memslot has been set up for private memory
> with
> 
>   if (fault->is_private && kvm_slot_can_be_private(slot))
>   return kvm_faultin_pfn_private(fault);
> 
> Without this check, restrictedmem_get_page will get called with NULL
> in slot->restricted_file, which causes a NULL pointer dereference.

Hmm, silently skipping the faultin would result in KVM faulting in the shared
portion of the memslot, and I believe would end up mapping that pfn as private,
i.e. would map a non-UPM PFN as a private mapping.  For TDX and SNP, that would
be double ungood as it would let the host access memory that is mapped private,
i.e. lead to #MC or #PF(RMP) in the host.

I believe the correct solution is to drop the "can be private" check from the
above check, and instead handle that in kvm_faultin_pfn_private().  That would 
fix
another bug, e.g. if the fault is shared, the slot can't be private, but for
whatever reason userspace marked the gfn as private.  Even though KVM might be
able service the fault, the correct thing to do in that case is to exit to 
userspace.

E.g.

---
 arch/x86/kvm/mmu/mmu.c | 36 ++--
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 10017a9f26ee..e2ac8873938e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4158,11 +4158,29 @@ static inline u8 order_to_level(int order)
return PG_LEVEL_4K;
 }
 
-static int kvm_faultin_pfn_private(struct kvm_page_fault *fault)
+static int kvm_do_memory_fault_exit(struct kvm_vcpu *vcpu,
+   struct kvm_page_fault *fault)
+{
+   vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
+   if (fault->is_private)
+   vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
+   else
+   vcpu->run->memory.flags = 0;
+   vcpu->run->memory.padding = 0;
+   vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
+   vcpu->run->memory.size = PAGE_SIZE;
+   return RET_PF_USER;
+}
+
+static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
+  struct kvm_page_fault *fault)
 {
int order;
struct kvm_memory_slot *slot = fault->slot;
 
+   if (kvm_slot_can_be_private(slot))
+   return kvm_do_memory_fault_exit(vcpu, fault);
+
if (kvm_restricted_mem_get_pfn(slot, fault->gfn, >pfn, ))
return RET_PF_RETRY;
 
@@ -4203,21 +4221,11 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
struct kvm_page_fault *fault)
return RET_PF_EMULATE;
}
 
-   if (kvm_slot_can_be_private(slot) &&
-   fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
-   vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
-   if (fault->is_private)
-   vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
-   else
-   vcpu->run->memory.flags = 0;
-   vcpu->run->memory.padding = 0;
-   vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
-   vcpu->run->memory.size = PAGE_SIZE;
-   return RET_PF_USER;
-   }
+   if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn))
+   return kvm_do_memory_fault_exit(vcpu, fault);
 
if (fault->is_private)
-   return kvm_faultin_pfn_private(fault);
+   return kvm_faultin_pfn_private(vcpu, fault);
 
async = false;
fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, ,

base-commit: 969d761bb7b8654605937f31ae76123dcb7f15a3
-- 




[PATCH] shpc: disallow unplug when power indicator is blinking

2022-11-16 Thread Vladimir Sementsov-Ogievskiy
Pressing attention button has special meaning when power indicator is
blinking. Better just not do it.

For example, trying to remove device immediately after hotplug leads to
both commands succeded but device not actually unrealized.

Same thing for PCIE hotplug was done in
  81124b3c7a5dae "pcie: add power indicator blink check"

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 hw/pci/shpc.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index e71f3a7483..fca7f6691a 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -568,6 +568,13 @@ void shpc_device_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 
 state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK);
 led = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK);
+
+if (led == SHPC_LED_BLINK) {
+error_setg(errp, "Hot-unplug failed: "
+   "guest is busy (power indicator blinking)");
+return;
+}
+
 if (state == SHPC_STATE_DISABLED && led == SHPC_LED_OFF) {
 shpc_free_devices_in_slot(shpc, slot);
 shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN);
-- 
2.34.1




Re: [PATCH 2/2] Do not access /dev/mem in MSI-X PCI passthrough on Xen

2022-11-16 Thread Marek Marczykowski-Górecki
On Wed, Nov 16, 2022 at 02:15:22PM -0500, Jason Andryuk wrote:
> On Mon, Nov 14, 2022 at 2:21 PM Marek Marczykowski-Górecki
>  wrote:
> >
> > The /dev/mem is used for two purposes:
> >  - reading PCI_MSIX_ENTRY_CTRL_MASKBIT
> >  - reading Pending Bit Array (PBA)
> >
> > The first one was originally done because when Xen did not send all
> > vector ctrl writes to the device model, so QEMU might have outdated old
> > register value. This has been changed in Xen, so QEMU can now use its
> > cached value of the register instead.
> >
> > The Pending Bit Array (PBA) handling is for the case where it lives on
> > the same page as the MSI-X table itself. Xen has been extended to handle
> > this case too (as well as other registers that may live on those pages),
> > so QEMU handling is not necessary anymore.
> >
> > Removing /dev/mem access is useful to work within stubdomain, and
> > necessary when dom0 kernel runs in lockdown mode.
> >
> > Signed-off-by: Marek Marczykowski-Górecki 
> 
> I put the Xen, QEMU, and xen-pciback patches into OpenXT and gave a
> little test.  When pci_permissive=0, iwlwifi fails to load its
> firmware.  With pci_permissive=1, it looks like MSI-X is enabled. (I
> previously included your libxl allow_interrupt_control patch - that
> seemed to get regular MSIs working prior to the MSI-X patches.)  I
> also removed the OpenXT equivalent of 0005-Disable-MSI-X-caps.patch.
> I am testing with Linux 5.4.y, so that could be another factor.

Can you confirm the allow_interrupt_control is set by libxl? Also,
vanilla 5.4 doesn't have the allow_interrupt_control patch at all, and you
may have an earlier version that had "allow_msi_enable" as the sysfs
file name.

> One strange thing is the lspci output.  Dom0 shows MSI-X enabled.
> Meanwhile NDVM (sys-net) does not show the MSI-X capability.  If you
> `hexdump -C /sys/bus/pci/devices/$dev/config` you can see MSI-X
> enabled, but you also see that the MSI capability has 00 as the next
> pointer, so lspci stops parsing.

This 00 value is written by Linux[1] (sic!) and then qemu incorrectly
allowing the write and happily emulating that zero. The other qemu patch
in this series ought to fix that (as in: properly refuse the write), do
you have it included?

[1] 
https://github.com/torvalds/linux/blob/master/drivers/net/wireless/intel/iwlwifi/pcie/drv.c#L1721

> MSI cap stubdom:
> 0040  10 00 92 00 c0 0e 00 00  10 0c 10 00 00 00 00 00  ||
> 0x41 -> next 0x00
> MSI cap dom0:
> 0040  10 80 92 00 c0 0e 00 10  10 0c 10 00 00 00 00 00  ||
> 0x41 -> next 0x80
> 
> MSI-X:
> 0080  11 00 0f 80 00 20 00 00  00 30 00 00 00 00 00 00
> 
> AFAIU, the value 0x80 at offset 0x83 is MSI-X Enabled.
> 
> I had a boot where assignment failed with the hypervisor printing:
> d12: assign (:00:14.3) failed (-16)
> Rebooting the laptop seemed to clear that.

Zombie of previous domain? Not set as "assignable" first?

-- 
Best Regards,
Marek Marczykowski-Górecki
Invisible Things Lab


signature.asc
Description: PGP signature


Re: [PATCH v9 7/8] KVM: Handle page fault for private memory

2022-11-16 Thread Ackerley Tng
> A memslot with KVM_MEM_PRIVATE being set can include both fd-based
> private memory and hva-based shared memory. Architecture code (like TDX
> code) can tell whether the on-going fault is private or not. This patch
> adds a 'is_private' field to kvm_page_fault to indicate this and
> architecture code is expected to set it.
>
> To handle page fault for such memslot, the handling logic is different
> depending on whether the fault is private or shared. KVM checks if
> 'is_private' matches the host's view of the page (maintained in
> mem_attr_array).
>   - For a successful match, private pfn is obtained with
> restrictedmem_get_page () from private fd and shared pfn is obtained
> with existing get_user_pages().
>   - For a failed match, KVM causes a KVM_EXIT_MEMORY_FAULT exit to
> userspace. Userspace then can convert memory between private/shared
> in host's view and retry the fault.
>
> Co-developed-by: Yu Zhang 
> Signed-off-by: Yu Zhang 
> Signed-off-by: Chao Peng 
> ---
>  arch/x86/kvm/mmu/mmu.c  | 56 +++--
>  arch/x86/kvm/mmu/mmu_internal.h | 14 -
>  arch/x86/kvm/mmu/mmutrace.h |  1 +
>  arch/x86/kvm/mmu/spte.h |  6 
>  arch/x86/kvm/mmu/tdp_mmu.c  |  3 +-
>  include/linux/kvm_host.h| 28 +
>  6 files changed, 103 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 67a9823a8c35..10017a9f26ee 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3030,7 +3030,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, 
> gfn_t gfn,
>
>  int kvm_mmu_max_mapping_level(struct kvm *kvm,
> const struct kvm_memory_slot *slot, gfn_t gfn,
> -   int max_level)
> +   int max_level, bool is_private)
>  {
>   struct kvm_lpage_info *linfo;
>   int host_level;
> @@ -3042,6 +3042,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
>   break;
>   }
>
> + if (is_private)
> + return max_level;
> +
>   if (max_level == PG_LEVEL_4K)
>   return PG_LEVEL_4K;
>
> @@ -3070,7 +3073,8 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, 
> struct kvm_page_fault *fault
>* level, which will be used to do precise, accurate accounting.
>*/
>   fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
> -  fault->gfn, 
> fault->max_level);
> +  fault->gfn, 
> fault->max_level,
> +  fault->is_private);
>   if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
>   return;
>
> @@ -4141,6 +4145,32 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, 
> struct kvm_async_pf *work)
>   kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
>  }
>
> +static inline u8 order_to_level(int order)
> +{
> + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
> +
> + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
> + return PG_LEVEL_1G;
> +
> + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
> + return PG_LEVEL_2M;
> +
> + return PG_LEVEL_4K;
> +}
> +
> +static int kvm_faultin_pfn_private(struct kvm_page_fault *fault)
>  +{
>  +int order;
>  +struct kvm_memory_slot *slot = fault->slot;
>  +
>  +if (kvm_restricted_mem_get_pfn(slot, fault->gfn, >pfn, ))
>+  return RET_PF_RETRY;
>+
>+  fault->max_level = min(order_to_level(order), fault->max_level);
>+  fault->map_writable = !(slot->flags & KVM_MEM_READONLY);
>+  return RET_PF_CONTINUE;
>+}
>+
> static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault 
> *fault)
> {
>   struct kvm_memory_slot *slot = fault->slot;
>@@ -4173,6 +4203,22 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, 
>struct kvm_page_fault *fault)
>   return RET_PF_EMULATE;
>   }
>
>+  if (kvm_slot_can_be_private(slot) &&
>+  fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
>+  vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT;
>+  if (fault->is_private)
>+  vcpu->run->memory.flags = KVM_MEMORY_EXIT_FLAG_PRIVATE;
>+  else
>+  vcpu->run->memory.flags = 0;
>+  vcpu->run->memory.padding = 0;
>+  vcpu->run->memory.gpa = fault->gfn << PAGE_SHIFT;
>+  vcpu->run->memory.size = PAGE_SIZE;
>+  return RET_PF_USER;
>+  }
>+
>+  if (fault->is_private)
>+  return kvm_faultin_pfn_private(fault);
>+

Since each memslot may also not be backed by restricted memory, we
should also check if the memslot has been set up for private memory
with

if (fault->is_private && kvm_slot_can_be_private(slot))
return 

Re: [PATCH 2/2] qapi: introduce DEVICE_POWER_ON for SHPC hotplug

2022-11-16 Thread Vladimir Sementsov-Ogievskiy

On 11/16/22 19:26, Michael S. Tsirkin wrote:

On Wed, Nov 16, 2022 at 07:12:34PM +0300, Vladimir Sementsov-Ogievskiy wrote:

Hi all! That's an RFC patch.

The problem is that SHPC protocol says that power-led is blinking for 5
seconds before actual turning-on the device. If we call device-del
during this time the attention button press is ignored and we never get
DEVICE_DELETED event, which is unexpected for the user.

I suggest add a pair for DEVICE_DELETED: DEVICE_POWER_ON. So user
should wait for DEVICE_POWER_ON after device-add before making any
other operations with the device (incluing device-del).

What I'm unsure is what about other types of hotplug - PCIE and
ACPI.. Do they suffer from similar problems?

I didn't yet look at this patchset deeply (we are in freeze anyway)
but PCIE is substancially same as SHPC.

Take a look at Gerd's "improve native hotplug for pcie root ports"
same kind of approach probably works for SHPC.


Looking at it. Yes, I think this approach is OK, thanks for the link.

I doubt now that we really need new event. Instead I can update SHPC to return an error 
when trying to unplug with blinking power indicator (like 81124b3c7a5dae "pcie: add 
power indicator blink check").

--
Best regards,
Vladimir




Re: [PATCH 1/2] remove DEC 21154 PCI bridge

2022-11-16 Thread BALATON Zoltan




On Wed, 16 Nov 2022, Igor Mammedov wrote:


Code has not been used practically since its inception (2004)
 f2aa58c6f4a20 UniNorth PCI bridge support
or maybe even earlier, but it was consuming contributors time
as QEMU was being rewritten.
Drop it for now. Whomever would like to actually
use the thing, can make sure it actually works/reintroduce
it back when there is a user.

PS:
I've stumbled upon this when replacing PCIDeviceClass::is_bridge
field with QOM cast to PCI_BRIDGE type. Unused DEC 21154
was the only one trying to use the field with plain PCIDevice.
It's not worth keeping the field around for the sake of the code
that was commented out 'forever'.

Signed-off-by: Igor Mammedov 
---
hw/pci-bridge/dec.h   |   9 ---
include/hw/pci/pci_ids.h  |   1 -
hw/pci-bridge/dec.c   | 164 --
hw/pci-bridge/meson.build |   2 -
hw/pci-host/uninorth.c|   6 --
5 files changed, 182 deletions(-)
delete mode 100644 hw/pci-bridge/dec.h
delete mode 100644 hw/pci-bridge/dec.c

diff --git a/hw/pci-bridge/dec.h b/hw/pci-bridge/dec.h
deleted file mode 100644
index 869e90b136..00
--- a/hw/pci-bridge/dec.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef HW_PCI_BRIDGE_DEC_H
-#define HW_PCI_BRIDGE_DEC_H
-
-
-#define TYPE_DEC_21154 "dec-21154-sysbus"
-
-PCIBus *pci_dec_21154_init(PCIBus *parent_bus, int devfn);
-
-#endif
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index bc9f834fd1..e4386ebb20 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -169,7 +169,6 @@

#define PCI_VENDOR_ID_DEC0x1011
#define PCI_DEVICE_ID_DEC_21143  0x0019
-#define PCI_DEVICE_ID_DEC_21154  0x0026

#define PCI_VENDOR_ID_CIRRUS 0x1013

diff --git a/hw/pci-bridge/dec.c b/hw/pci-bridge/dec.c
deleted file mode 100644
index 4773d07e6d..00
--- a/hw/pci-bridge/dec.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * QEMU DEC 21154 PCI bridge
- *
- * Copyright (c) 2006-2007 Fabrice Bellard
- * Copyright (c) 2007 Jocelyn Mayer
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to 
deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "dec.h"
-#include "hw/sysbus.h"
-#include "qapi/error.h"
-#include "qemu/module.h"
-#include "hw/pci/pci.h"
-#include "hw/pci/pci_host.h"
-#include "hw/pci/pci_bridge.h"
-#include "hw/pci/pci_bus.h"
-#include "qom/object.h"
-
-OBJECT_DECLARE_SIMPLE_TYPE(DECState, DEC_21154)
-
-struct DECState {
-PCIHostState parent_obj;
-};
-
-static int dec_map_irq(PCIDevice *pci_dev, int irq_num)
-{
-return irq_num;
-}
-
-static void dec_pci_bridge_realize(PCIDevice *pci_dev, Error **errp)
-{
-pci_bridge_initfn(pci_dev, TYPE_PCI_BUS);
-}
-
-static void dec_21154_pci_bridge_class_init(ObjectClass *klass, void *data)
-{
-DeviceClass *dc = DEVICE_CLASS(klass);
-PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
-
-set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
-k->realize = dec_pci_bridge_realize;
-k->exit = pci_bridge_exitfn;
-k->vendor_id = PCI_VENDOR_ID_DEC;
-k->device_id = PCI_DEVICE_ID_DEC_21154;
-k->config_write = pci_bridge_write_config;
-k->is_bridge = true;
-dc->desc = "DEC 21154 PCI-PCI bridge";
-dc->reset = pci_bridge_reset;
-dc->vmsd = _pci_device;
-}
-
-static const TypeInfo dec_21154_pci_bridge_info = {
-.name  = "dec-21154-p2p-bridge",
-.parent= TYPE_PCI_BRIDGE,
-.instance_size = sizeof(PCIBridge),
-.class_init= dec_21154_pci_bridge_class_init,
-.interfaces = (InterfaceInfo[]) {
-{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
-{ },
-},
-};
-
-PCIBus *pci_dec_21154_init(PCIBus *parent_bus, int devfn)
-{
-PCIDevice *dev;
-PCIBridge *br;
-
-dev = pci_new_multifunction(devfn, false, "dec-21154-p2p-bridge");
-br = PCI_BRIDGE(dev);
-pci_bridge_map_irq(br, "DEC 21154 PCI-PCI bridge", dec_map_irq);
-pci_realize_and_unref(dev, parent_bus, _fatal);
-return 

Re: Failure analysis (was Re: [PULL for 7.2 00/10] testing and doc updates)

2022-11-16 Thread Mark Cave-Ayland

On 16/11/2022 18:20, Alex Bennée wrote:


Stefan Hajnoczi  writes:


This pull request causes the following CI failure:

https://gitlab.com/qemu-project/qemu/-/jobs/3328449477

I haven't figured out the root cause of the failure. Maybe the pull
request just exposes a latent failure. Please take a look and we can
try again for -rc2.


OK after a lot of digging I've come to the following conclusion:

   * the Fuloong 2E machine never enables the FIFO on the 16550 (s->fcr & 
UART_FCR_FE)
   * as a result if qemu_chr_fe_write(>chr, >tsr, 1) fails with -EAGAIN
 - a serial_watch_cb is queued
 - s->tsr_retry++
   * additional serial_ioport_write's overwrite s->thr
   * the console output gets corrupted

You can see the effect by comparing the serial write and xmit values:

   ➜  grep serial_write alex.log | cut -d ' ' -f 6 | xxd -r -p | head -n 10
   [0.00] Initializing cgroup subsys cpuset
   [0.00] Initializing cgroup subsys cpu
   [0.00] Initializing cgroup subsys cpuacct
   [0.00] Linux version 3.16.0-6-loongson-2e 
(debian-ker...@lists.debian.org) (gcc version 4.8.4 (Debian 4.8.4-1) ) #1 
Debian 3.16.56-1+deb8u1 (2018-05-08)
   [0.00] memsize=256, highmemsize=0
   [0.00] CpuClock = 53308
   [0.00] bootconsole [early0] enabled
   [0.00] CPU0 revision is: 6302 (ICT Loongson-2)
   [0.00] FPU revision is: 0501
   [0.00] Checking for the multiply/shift bug... no.
   18:27:17 alex@zen:qemu.git/builds/all  on  pr/141122-misc-for-7.2-1 [$!?⇕]
   ➜  grep serial_xmit alex.log | cut -d ' ' -f 2 | xxd -r -p | head -n 10
   [0.00] Initializing cgroup subsys cpuset
   [0.00] Initializing cgroup subsys cpu
   [0.00] Initializing cgroup subsys cpuacct
   [0.00] Linux version 3.16.0-6-loongson-2e 
(debian-ker...@lists.debian.org) (gcc version 4.8.4 (Debian 4.8.4-1) ) #1 
Debian 33 0.00] bootconsole [early0] enabled
   [0.00] CPU0 revision is: 6302 (ICT Loongson-2)
   [0.00] FPU revision is: 0501
   [0.00] Checking for the multiply/shift bug... no.
   [0.00] Checking for the daddiu bug... no.
   [0.00] Determined physical RAM map:
   [0.00]  memory: 000

As a result the check for the pattern fails:

 console_pattern = 'Kernel command line: %s' % kernel_command_line
 self.wait_for_console_pattern(console_pattern)

resulting in a timeout and test fail.

In effect the configuration makes the output dependent on how fast the
avocado test can drain the socket as there is no buffering elsewhere in
the system. The changes in:

   Subject: [PULL 02/10] tests/avocado: improve behaviour waiting for login 
prompts

makes this failure more likely to happen - I think because the .peek() and
.readline() behaviour have different buffering strategies. Options
include:

   - enable the 16550 FIFO for the Loognson kernel (command line option?)
   - increase the buffering of the python socket.socket() code

I can get it to pass by shuffling the time.sleep() and a few other
checks around but that seems flaky at best.


Nice work! This is the well-known problem whereby the kernel sometimes expects the 
BIOS to have pre-configured the serial ports, which of course never happens when 
booting directly with -kernel.


Given that the fuloong2e machine already has a mini "trampoline" bootloader, would it 
be possible to tweak write_bootloader() at 
https://gitlab.com/qemu-project/qemu/-/blob/master/hw/mips/fuloong2e.c#L166 to set 
UART_FCR_FE on the available UARTs before jumping into the kernel?



ATB,

Mark.



Re: [PATCH 2/2] Do not access /dev/mem in MSI-X PCI passthrough on Xen

2022-11-16 Thread Jason Andryuk
On Mon, Nov 14, 2022 at 2:21 PM Marek Marczykowski-Górecki
 wrote:
>
> The /dev/mem is used for two purposes:
>  - reading PCI_MSIX_ENTRY_CTRL_MASKBIT
>  - reading Pending Bit Array (PBA)
>
> The first one was originally done because when Xen did not send all
> vector ctrl writes to the device model, so QEMU might have outdated old
> register value. This has been changed in Xen, so QEMU can now use its
> cached value of the register instead.
>
> The Pending Bit Array (PBA) handling is for the case where it lives on
> the same page as the MSI-X table itself. Xen has been extended to handle
> this case too (as well as other registers that may live on those pages),
> so QEMU handling is not necessary anymore.
>
> Removing /dev/mem access is useful to work within stubdomain, and
> necessary when dom0 kernel runs in lockdown mode.
>
> Signed-off-by: Marek Marczykowski-Górecki 

I put the Xen, QEMU, and xen-pciback patches into OpenXT and gave a
little test.  When pci_permissive=0, iwlwifi fails to load its
firmware.  With pci_permissive=1, it looks like MSI-X is enabled. (I
previously included your libxl allow_interrupt_control patch - that
seemed to get regular MSIs working prior to the MSI-X patches.)  I
also removed the OpenXT equivalent of 0005-Disable-MSI-X-caps.patch.
I am testing with Linux 5.4.y, so that could be another factor.

One strange thing is the lspci output.  Dom0 shows MSI-X enabled.
Meanwhile NDVM (sys-net) does not show the MSI-X capability.  If you
`hexdump -C /sys/bus/pci/devices/$dev/config` you can see MSI-X
enabled, but you also see that the MSI capability has 00 as the next
pointer, so lspci stops parsing.

MSI cap stubdom:
0040  10 00 92 00 c0 0e 00 00  10 0c 10 00 00 00 00 00  ||
0x41 -> next 0x00
MSI cap dom0:
0040  10 80 92 00 c0 0e 00 10  10 0c 10 00 00 00 00 00  ||
0x41 -> next 0x80

MSI-X:
0080  11 00 0f 80 00 20 00 00  00 30 00 00 00 00 00 00

AFAIU, the value 0x80 at offset 0x83 is MSI-X Enabled.

I had a boot where assignment failed with the hypervisor printing:
d12: assign (:00:14.3) failed (-16)
Rebooting the laptop seemed to clear that.

Regards,
Jason



Re: [PATCH v9 3/8] KVM: Add KVM_EXIT_MEMORY_FAULT exit

2022-11-16 Thread Alex Bennée


Chao Peng  writes:

> On Tue, Nov 15, 2022 at 04:56:12PM +, Alex Bennée wrote:
>> 
>> Chao Peng  writes:
>> 
>> > This new KVM exit allows userspace to handle memory-related errors. It
>> > indicates an error happens in KVM at guest memory range [gpa, gpa+size).
>> > The flags includes additional information for userspace to handle the
>> > error. Currently bit 0 is defined as 'private memory' where '1'
>> > indicates error happens due to private memory access and '0' indicates
>> > error happens due to shared memory access.
>> >
>> > When private memory is enabled, this new exit will be used for KVM to
>> > exit to userspace for shared <-> private memory conversion in memory
>> > encryption usage. In such usage, typically there are two kind of memory
>> > conversions:
>> >   - explicit conversion: happens when guest explicitly calls into KVM
>> > to map a range (as private or shared), KVM then exits to userspace
>> > to perform the map/unmap operations.
>> >   - implicit conversion: happens in KVM page fault handler where KVM
>> > exits to userspace for an implicit conversion when the page is in a
>> > different state than requested (private or shared).
>> >
>> > Suggested-by: Sean Christopherson 
>> > Co-developed-by: Yu Zhang 
>> > Signed-off-by: Yu Zhang 
>> > Signed-off-by: Chao Peng 
>> > ---
>> >  Documentation/virt/kvm/api.rst | 23 +++
>> >  include/uapi/linux/kvm.h   |  9 +
>> >  2 files changed, 32 insertions(+)
>> >
>> > diff --git a/Documentation/virt/kvm/api.rst 
>> > b/Documentation/virt/kvm/api.rst
>> > index f3fa75649a78..975688912b8c 100644
>> > --- a/Documentation/virt/kvm/api.rst
>> > +++ b/Documentation/virt/kvm/api.rst
>> > @@ -6537,6 +6537,29 @@ array field represents return values. The userspace 
>> > should update the return
>> >  values of SBI call before resuming the VCPU. For more details on RISC-V 
>> > SBI
>> >  spec refer, https://github.com/riscv/riscv-sbi-doc.
>> >  
>> > +::
>> > +
>> > +  /* KVM_EXIT_MEMORY_FAULT */
>> > +  struct {
>> > +  #define KVM_MEMORY_EXIT_FLAG_PRIVATE(1 << 0)
>> > +  __u32 flags;
>> > +  __u32 padding;
>> > +  __u64 gpa;
>> > +  __u64 size;
>> > +  } memory;
>> > +
>> > +If exit reason is KVM_EXIT_MEMORY_FAULT then it indicates that the VCPU 
>> > has
>> > +encountered a memory error which is not handled by KVM kernel module and
>> > +userspace may choose to handle it. The 'flags' field indicates the memory
>> > +properties of the exit.
>> > +
>> > + - KVM_MEMORY_EXIT_FLAG_PRIVATE - indicates the memory error is caused by
>> > +   private memory access when the bit is set. Otherwise the memory error 
>> > is
>> > +   caused by shared memory access when the bit is clear.
>> 
>> What does a shared memory access failure entail?
>
> In the context of confidential computing usages, guest can issue a
> shared memory access while the memory is actually private from the host
> point of view. This exit with bit 0 cleared gives userspace a chance to
> convert the private memory to shared memory on host.

I think this should be explicit rather than implied by the absence of
another flag. Sean suggested you might want flags for RWX failures so
maybe something like:

KVM_MEMORY_EXIT_SHARED_FLAG_READ(1 << 0)
KVM_MEMORY_EXIT_SHARED_FLAG_WRITE   (1 << 1)
KVM_MEMORY_EXIT_SHARED_FLAG_EXECUTE (1 << 2)
KVM_MEMORY_EXIT_FLAG_PRIVATE(1 << 3)

which would allow you to signal the various failure modes of the shared
region, or that you had accessed private memory.

>
>> 
>> If you envision any other failure modes it might be worth making it
>> explicit with additional flags.
>
> Sean mentioned some more usages[1][]2] other than the memory conversion
> for confidential usage. But I would leave those flags being added in the
> future after those usages being well discussed.
>
> [1] https://lkml.kernel.org/r/20200617230052.gb27...@linux.intel.com
> [2] https://lore.kernel.org/all/ykxjlcg%2fwompe...@google.com
>
>> I also wonder if a bitmask makes sense if
>> there can only be one reason for a failure? Maybe all that is needed is
>> a reason enum?
>
> Tough we only have one reason right now but we still want to leave room
> for future extension. Enum can express a single value at once well but
> bitmask makes it possible to express multiple orthogonal flags.

I agree if multiple orthogonal failures can occur at once a bitmask is
the right choice.

>
> Chao
>> 
>> > +
>> > +'gpa' and 'size' indicate the memory range the error occurs at. The 
>> > userspace
>> > +may handle the error and return to KVM to retry the previous memory 
>> > access.
>> > +
>> >  ::
>> >  
>> >  /* KVM_EXIT_NOTIFY */
>> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> > index f1ae45c10c94..fa60b032a405 100644
>> > --- a/include/uapi/linux/kvm.h
>> > +++ 

[RFC PATCH 3/3] hw/isa/vt82c686: Implement PIRQ routing

2022-11-16 Thread Bernhard Beschow
Both VIA south bridges allow system software to configure the routing of
PCI interrupts to ISA interrupts. Implement this to model the real
hardware more closely.

The implementation is based on hw/isa/piix4.c.

Signed-off-by: Bernhard Beschow 
---
 hw/isa/vt82c686.c | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index 3f9bd0c04d..21157c669b 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -16,6 +16,7 @@
 #include "qemu/osdep.h"
 #include "hw/isa/vt82c686.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/qdev-properties.h"
 #include "hw/ide/pci.h"
 #include "hw/isa/isa.h"
@@ -604,6 +605,48 @@ static void via_isa_request_i8259_irq(void *opaque, int 
irq, int level)
 qemu_set_irq(s->cpu_intr, level);
 }
 
+static int via_isa_get_pic_irq(const ViaISAState *s, int irq_num)
+{
+switch (irq_num) {
+case 0:
+return s->dev.config[0x55] >> 4;
+
+case 1:
+return s->dev.config[0x56] & 0xf;
+
+case 2:
+return s->dev.config[0x56] >> 4;
+
+case 3:
+return s->dev.config[0x57] >> 4;
+}
+
+return 0;
+}
+
+static void via_isa_set_pic_irq(void *opaque, int irq_num, int level)
+{
+ViaISAState *s = opaque;
+PCIBus *bus = pci_get_bus(>dev);
+int pic_irq;
+
+/* now we change the pic irq level according to the via irq mappings */
+/* XXX: optimize */
+pic_irq = via_isa_get_pic_irq(s, irq_num);
+if (pic_irq < ISA_NUM_IRQS) {
+int i, pic_level;
+
+/* The pic level is the logical OR of all the PCI irqs mapped to it. */
+pic_level = 0;
+for (i = 0; i < PCI_NUM_PINS; i++) {
+if (pic_irq == via_isa_get_pic_irq(s, i)) {
+pic_level |= pci_bus_get_irq_level(bus, i);
+}
+}
+qemu_set_irq(s->isa_irqs[pic_irq], pic_level);
+}
+}
+
 static void via_isa_realize(PCIDevice *d, Error **errp)
 {
 ViaISAState *s = VIA_ISA(d);
@@ -676,6 +719,9 @@ static void via_isa_realize(PCIDevice *d, Error **errp)
 if (!qdev_realize(DEVICE(>mc97), BUS(pci_bus), errp)) {
 return;
 }
+
+pci_bus_irqs(pci_bus, via_isa_set_pic_irq, pci_bus->map_irq,
+ s, ISA_NUM_IRQS);
 }
 
 /* TYPE_VT82C686B_ISA */
-- 
2.38.1




[RFC PATCH 2/3] hw/isa/piix4: Decouple INTx-to-LNKx routing which is board-specific

2022-11-16 Thread Bernhard Beschow
pci_map_irq_fn's in general seem to be board-specific, and PIIX4's
pci_slot_get_pirq() in particular seems very Malta-specific. So move the
latter to malta.c to 1/ keep the board logic in one place and 2/ avoid
PIIX4 to make assumptions about its board.

Signed-off-by: Bernhard Beschow 
---
 hw/isa/piix4.c  | 28 ++--
 hw/mips/malta.c | 29 +
 2 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c
index 8fc1db6dc9..709dd901c2 100644
--- a/hw/isa/piix4.c
+++ b/hw/isa/piix4.c
@@ -28,6 +28,7 @@
 #include "hw/irq.h"
 #include "hw/southbridge/piix.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/ide/piix.h"
 #include "hw/isa/isa.h"
 #include "hw/intc/i8259.h"
@@ -79,31 +80,6 @@ static void piix4_set_irq(void *opaque, int irq_num, int 
level)
 }
 }
 
-static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
-{
-int slot;
-
-slot = PCI_SLOT(pci_dev->devfn);
-
-switch (slot) {
-/* PIIX4 USB */
-case 10:
-return 3;
-/* AMD 79C973 Ethernet */
-case 11:
-return 1;
-/* Crystal 4281 Sound */
-case 12:
-return 2;
-/* PCI slot 1 to 4 */
-case 18 ... 21:
-return ((slot - 18) + irq_num) & 0x03;
-/* Unknown device, don't do any translation */
-default:
-return irq_num;
-}
-}
-
 static void piix4_isa_reset(DeviceState *dev)
 {
 PIIX4State *d = PIIX4_PCI_DEVICE(dev);
@@ -271,7 +247,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
 }
 qdev_connect_gpio_out(DEVICE(>pm), 0, s->isa[9]);
 
-pci_bus_irqs(pci_bus, piix4_set_irq, pci_slot_get_pirq, s, PIIX_NUM_PIRQS);
+pci_bus_irqs(pci_bus, piix4_set_irq, pci_bus->map_irq, s, PIIX_NUM_PIRQS);
 }
 
 static void piix4_init(Object *obj)
diff --git a/hw/mips/malta.c b/hw/mips/malta.c
index c0a2e0ab04..8a6b66e759 100644
--- a/hw/mips/malta.c
+++ b/hw/mips/malta.c
@@ -39,6 +39,7 @@
 #include "hw/mips/bootloader.h"
 #include "hw/mips/cpudevs.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "qemu/log.h"
 #include "hw/mips/bios.h"
 #include "hw/ide/pci.h"
@@ -1140,6 +1141,31 @@ static void malta_mips_config(MIPSCPU *cpu)
 }
 }
 
+static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
+{
+int slot;
+
+slot = PCI_SLOT(pci_dev->devfn);
+
+switch (slot) {
+/* PIIX4 USB */
+case 10:
+return 3;
+/* AMD 79C973 Ethernet */
+case 11:
+return 1;
+/* Crystal 4281 Sound */
+case 12:
+return 2;
+/* PCI slot 1 to 4 */
+case 18 ... 21:
+return ((slot - 18) + irq_num) & 0x03;
+/* Unknown device, don't do any translation */
+default:
+return irq_num;
+}
+}
+
 static void main_cpu_reset(void *opaque)
 {
 MIPSCPU *cpu = opaque;
@@ -1411,6 +1437,9 @@ void mips_malta_init(MachineState *machine)
 /* Interrupt controller */
 qdev_connect_gpio_out_named(DEVICE(piix4), "intr", 0, i8259_irq);
 
+pci_bus_irqs(pci_bus, pci_bus->set_irq, pci_slot_get_pirq,
+ piix4, pci_bus->nirq);
+
 /* generate SPD EEPROM data */
 dev = DEVICE(object_resolve_path_component(OBJECT(piix4), "pm"));
 smbus = I2C_BUS(qdev_get_child_bus(dev, "i2c"));
-- 
2.38.1




[RFC PATCH 1/3] hw/isa/piix3: Decouple INTx-to-LNKx routing which is board-specific

2022-11-16 Thread Bernhard Beschow
pci_map_irq_fn's in general seem to be board-specific. So move PIIX3's
pci_slot_get_pirq() to board code to not have PIIX3 make assuptions
about its board.

Signed-off-by: Bernhard Beschow 
---
 hw/i386/pc_piix.c | 17 +
 hw/isa/piix3.c| 16 +++-
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0ad0ed1603..07aa38081a 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -47,6 +47,7 @@
 #include "hw/sysbus.h"
 #include "hw/i2c/smbus_eeprom.h"
 #include "hw/xen/xen-x86.h"
+#include "hw/xen/xen.h"
 #include "exec/memory.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/piix4.h"
@@ -73,6 +74,17 @@ static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
 static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
 #endif
 
+/*
+ * Return the global irq number corresponding to a given device irq
+ * pin. We could also use the bus number to have a more precise mapping.
+ */
+static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx)
+{
+int slot_addend;
+slot_addend = PCI_SLOT(pci_dev->devfn) - 1;
+return (pci_intx + slot_addend) & 3;
+}
+
 /* PC hardware initialisation */
 static void pc_init1(MachineState *machine,
  const char *host_type, const char *pci_type)
@@ -223,6 +235,11 @@ static void pc_init1(MachineState *machine,
 piix3->pic = x86ms->gsi;
 piix3_devfn = piix3->dev.devfn;
 isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(piix3), "isa.0"));
+
+pci_bus_irqs(pci_bus, pci_bus->set_irq,
+ xen_enabled() ? xen_pci_slot_get_pirq
+   : pci_slot_get_pirq,
+ pci_dev, pci_bus->nirq);
 } else {
 pci_bus = NULL;
 isa_bus = isa_bus_new(NULL, get_system_memory(), system_io,
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index f9b4af5c05..83a6e3be72 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -29,6 +29,7 @@
 #include "hw/southbridge/piix.h"
 #include "hw/irq.h"
 #include "hw/isa/isa.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/xen/xen.h"
 #include "sysemu/runstate.h"
 #include "migration/vmstate.h"
@@ -79,17 +80,6 @@ static void piix3_set_irq(void *opaque, int pirq, int level)
 piix3_set_irq_level(piix3, pirq, level);
 }
 
-/*
- * Return the global irq number corresponding to a given device irq
- * pin. We could also use the bus number to have a more precise mapping.
- */
-static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx)
-{
-int slot_addend;
-slot_addend = PCI_SLOT(pci_dev->devfn) - 1;
-return (pci_intx + slot_addend) & 3;
-}
-
 static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin)
 {
 PIIX3State *piix3 = opaque;
@@ -388,7 +378,7 @@ static void piix3_realize(PCIDevice *dev, Error **errp)
 return;
 }
 
-pci_bus_irqs(pci_bus, piix3_set_irq, pci_slot_get_pirq,
+pci_bus_irqs(pci_bus, piix3_set_irq, pci_bus->map_irq,
  piix3, PIIX_NUM_PIRQS);
 pci_bus_set_route_irq_fn(pci_bus, piix3_route_intx_pin_to_irq);
 }
@@ -424,7 +414,7 @@ static void piix3_xen_realize(PCIDevice *dev, Error **errp)
  * connected to the IOAPIC directly.
  * These additional routes can be discovered through ACPI.
  */
-pci_bus_irqs(pci_bus, xen_piix3_set_irq, xen_pci_slot_get_pirq,
+pci_bus_irqs(pci_bus, xen_piix3_set_irq, pci_bus->map_irq,
  piix3, XEN_PIIX_NUM_PIRQS);
 }
 
-- 
2.38.1




[RFC PATCH 0/3] Decouple INTx-to-LNKx routing from south bridges

2022-11-16 Thread Bernhard Beschow
During my PIIX consolidation work [1] I've noticed that both PIIX models have
quite different pci_slot_get_pirq() implementations. These functions seem to
map PCI INTx pins to input pins of a programmable interrupt router which is
AFAIU board-specific. IOW, board-specific assumptions are baked into the device
models which prevent e.g. the whole PIIX4 south bridge to be reusable in the PC
machine.

In this series, I've moved the pci_slot_get_pirq() implementations into their
respective boards. This required a hack, however, thus this RFC. The issue is
that pci_slot_get_pirq() can only be assigned using pci_bus_irqs() which also
wants a pci_set_irq_fn. That function is in turn device-specific.

Futhermore, the issue does not only affect PIIX but also the VIA south bridges
as demonstrated in the last patch. Any advice for an upstreamable solution would
be highly appreciated.

Testing done:
* `make check`
* `make check-avocado`
* `qemu-system-ppc -machine pegasos2 -rtc base=localtime -device 
ati-vga,guest_hwcursor=true,romfile="" -cdrom morphos-3.17.iso -kernel 
morphos-3.17/boot.img -serial stdio`
* `qemu-system-mips64el -M malta -kernel vmlinux-3.2.0-4-5kc-malta -hda 
debian_wheezy_mipsel_standard.qcow2 -append "root=/dev/sda1 console=ttyS0"`
* `qemu-system-x86_64 -M pc -m 2G -cdrom manjaro-kde-21.3.2-220704-linux515.iso`

Thanks,
Bernhard

[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-10/msg03941.html

Bernhard Beschow (3):
  hw/isa/piix3: Decouple INTx-to-LNKx routing which is board-specific
  hw/isa/piix4: Decouple INTx-to-LNKx routing which is board-specific
  hw/isa/vt82c686: Implement PIRQ routing

 hw/i386/pc_piix.c | 17 +
 hw/isa/piix3.c| 16 +++-
 hw/isa/piix4.c| 28 ++--
 hw/isa/vt82c686.c | 46 ++
 hw/mips/malta.c   | 29 +
 5 files changed, 97 insertions(+), 39 deletions(-)

-- 
2.38.1




Re: [PATCH v9 3/8] KVM: Add KVM_EXIT_MEMORY_FAULT exit

2022-11-16 Thread Sean Christopherson
On Wed, Nov 16, 2022, Andy Lutomirski wrote:
> 
> 
> On Tue, Oct 25, 2022, at 8:13 AM, Chao Peng wrote:
> > diff --git a/Documentation/virt/kvm/api.rst 
> > b/Documentation/virt/kvm/api.rst
> > index f3fa75649a78..975688912b8c 100644
> > --- a/Documentation/virt/kvm/api.rst
> > +++ b/Documentation/virt/kvm/api.rst
> > @@ -6537,6 +6537,29 @@ array field represents return values. The 
> > userspace should update the return
> >  values of SBI call before resuming the VCPU. For more details on 
> > RISC-V SBI
> >  spec refer, https://github.com/riscv/riscv-sbi-doc.
> > 
> > +::
> > +
> > +   /* KVM_EXIT_MEMORY_FAULT */
> > +   struct {
> > +  #define KVM_MEMORY_EXIT_FLAG_PRIVATE (1 << 0)
> > +   __u32 flags;
> > +   __u32 padding;
> > +   __u64 gpa;
> > +   __u64 size;
> > +   } memory;
> > +
> 
> Would it make sense to also have a field for the access type (read, write,
> execute, etc)?  I realize that shared <-> private conversion doesn't strictly
> need this, but it seems like it could be useful for logging failures and also
> for avoiding a second immediate fault if the type gets converted but doesn't
> have the right protection yet.

I don't think a separate field is necessary, that info can be conveyed via 
flags.
Though maybe we should go straight to a u64 for flags.  Hmm, and maybe avoid 
bits
0-3 so that if/when RWX info is conveyed the flags can align with
PROT_{READ,WRITE,EXEC} and the EPT flags, e.g.

KVM_MEMORY_EXIT_FLAG_READ   (1 << 0)
KVM_MEMORY_EXIT_FLAG_WRITE  (1 << 1)
KVM_MEMORY_EXIT_FLAG_EXECUTE(1 << 2)

> (Obviously, if this were changed, KVM would need the ability to report that
> it doesn't actually know the mode.)
> 
> --Andy



Re: [PATCH v3 14/17] vfio/migration: Reset device if setting recover state fails

2022-11-16 Thread Alex Williamson
On Thu, 3 Nov 2022 18:16:17 +0200
Avihai Horon  wrote:

> If vfio_migration_set_state() fails to set the device in the requested
> state it tries to put it in a recover state. If setting the device in
> the recover state fails as well, hw_error is triggered and the VM is
> aborted.
> 
> To improve user experience and avoid VM data loss, reset the device with
> VFIO_RESET_DEVICE instead of aborting the VM.
> 
> Signed-off-by: Avihai Horon 
> ---
>  hw/vfio/migration.c | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index f8c3228314..e8068b9147 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -92,8 +92,18 @@ static int vfio_migration_set_state(VFIODevice *vbasedev,
>  
>  mig_state->device_state = recover_state;
>  if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
> -hw_error("%s: Failed setting device in recover state, err: %s",
> - vbasedev->name, strerror(errno));
> +error_report(
> +"%s: Failed setting device in recover state, err: %s. 
> Resetting device",
> + vbasedev->name, strerror(errno));
> +
> +if (ioctl(vbasedev->fd, VFIO_DEVICE_RESET)) {
> +hw_error("%s: Failed resetting device, err: %s", 
> vbasedev->name,
> + strerror(errno));
> +}
> +
> +migration->device_state = VFIO_DEVICE_STATE_RUNNING;
> +
> +return -1;
>  }
>  
>  migration->device_state = recover_state;

This addresses one of my comments on 12/ and should probably be rolled
in there.  Thanks,

Alex




Failure analysis (was Re: [PULL for 7.2 00/10] testing and doc updates)

2022-11-16 Thread Alex Bennée


Stefan Hajnoczi  writes:

> This pull request causes the following CI failure:
>
> https://gitlab.com/qemu-project/qemu/-/jobs/3328449477
>
> I haven't figured out the root cause of the failure. Maybe the pull
> request just exposes a latent failure. Please take a look and we can
> try again for -rc2.

OK after a lot of digging I've come to the following conclusion:

  * the Fuloong 2E machine never enables the FIFO on the 16550 (s->fcr & 
UART_FCR_FE)
  * as a result if qemu_chr_fe_write(>chr, >tsr, 1) fails with -EAGAIN
- a serial_watch_cb is queued
- s->tsr_retry++
  * additional serial_ioport_write's overwrite s->thr
  * the console output gets corrupted

You can see the effect by comparing the serial write and xmit values:

  ➜  grep serial_write alex.log | cut -d ' ' -f 6 | xxd -r -p | head -n 10
  [0.00] Initializing cgroup subsys cpuset
  [0.00] Initializing cgroup subsys cpu
  [0.00] Initializing cgroup subsys cpuacct
  [0.00] Linux version 3.16.0-6-loongson-2e 
(debian-ker...@lists.debian.org) (gcc version 4.8.4 (Debian 4.8.4-1) ) #1 
Debian 3.16.56-1+deb8u1 (2018-05-08)
  [0.00] memsize=256, highmemsize=0
  [0.00] CpuClock = 53308
  [0.00] bootconsole [early0] enabled
  [0.00] CPU0 revision is: 6302 (ICT Loongson-2)
  [0.00] FPU revision is: 0501
  [0.00] Checking for the multiply/shift bug... no.
  18:27:17 alex@zen:qemu.git/builds/all  on  pr/141122-misc-for-7.2-1 [$!?⇕] 
  ➜  grep serial_xmit alex.log | cut -d ' ' -f 2 | xxd -r -p | head -n 10
  [0.00] Initializing cgroup subsys cpuset
  [0.00] Initializing cgroup subsys cpu
  [0.00] Initializing cgroup subsys cpuacct
  [0.00] Linux version 3.16.0-6-loongson-2e 
(debian-ker...@lists.debian.org) (gcc version 4.8.4 (Debian 4.8.4-1) ) #1 
Debian 33 0.00] bootconsole [early0] enabled
  [0.00] CPU0 revision is: 6302 (ICT Loongson-2)
  [0.00] FPU revision is: 0501
  [0.00] Checking for the multiply/shift bug... no.
  [0.00] Checking for the daddiu bug... no.
  [0.00] Determined physical RAM map:
  [0.00]  memory: 000

As a result the check for the pattern fails:

console_pattern = 'Kernel command line: %s' % kernel_command_line
self.wait_for_console_pattern(console_pattern)

resulting in a timeout and test fail.

In effect the configuration makes the output dependent on how fast the
avocado test can drain the socket as there is no buffering elsewhere in
the system. The changes in:

  Subject: [PULL 02/10] tests/avocado: improve behaviour waiting for login 
prompts

makes this failure more likely to happen - I think because the .peek() and
.readline() behaviour have different buffering strategies. Options
include:

  - enable the 16550 FIFO for the Loognson kernel (command line option?)
  - increase the buffering of the python socket.socket() code

I can get it to pass by shuffling the time.sleep() and a few other
checks around but that seems flaky at best.

-- 
Alex Bennée



Re: [PATCH v3 12/17] vfio/migration: Implement VFIO migration protocol v2

2022-11-16 Thread Alex Williamson
On Thu, 3 Nov 2022 18:16:15 +0200
Avihai Horon  wrote:

> Add implementation of VFIO migration protocol v2. The two protocols, v1
> and v2, will co-exist and in next patch v1 protocol will be removed.
> 
> There are several main differences between v1 and v2 protocols:
> - VFIO device state is now represented as a finite state machine instead
>   of a bitmap.
> 
> - Migration interface with kernel is now done using VFIO_DEVICE_FEATURE
>   ioctl and normal read() and write() instead of the migration region.
> 
> - VFIO migration protocol v2 currently doesn't support the pre-copy
>   phase of migration.
> 
> Detailed information about VFIO migration protocol v2 and difference
> compared to v1 can be found here [1].
> 
> [1]
> https://lore.kernel.org/all/20220224142024.147653-10-yish...@nvidia.com/
> 
> Signed-off-by: Avihai Horon 
> ---
>  hw/vfio/common.c  |  19 +-
>  hw/vfio/migration.c   | 386 ++
>  hw/vfio/trace-events  |   4 +
>  include/hw/vfio/vfio-common.h |   5 +
>  4 files changed, 375 insertions(+), 39 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 617e6cd901..0bdbd1586b 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -355,10 +355,18 @@ static bool 
> vfio_devices_all_dirty_tracking(VFIOContainer *container)
>  return false;
>  }
>  
> -if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) 
> &&
> +if (!migration->v2 &&
> +(vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) 
> &&
>  (migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING)) 
> {
>  return false;
>  }
> +
> +if (migration->v2 &&
> +(vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) 
> &&
> +(migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
> + migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P)) {
> +return false;
> +}
>  }
>  }
>  return true;
> @@ -385,7 +393,14 @@ static bool 
> vfio_devices_all_running_and_mig_active(VFIOContainer *container)
>  return false;
>  }
>  
> -if (migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING) {
> +if (!migration->v2 &&
> +migration->device_state_v1 & VFIO_DEVICE_STATE_V1_RUNNING) {
> +continue;
> +}
> +
> +if (migration->v2 &&
> +(migration->device_state == VFIO_DEVICE_STATE_RUNNING ||
> + migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P)) {
>  continue;
>  } else {
>  return false;
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index e784374453..62afc23a8c 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -44,8 +44,84 @@
>  #define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xef13ULL)
>  #define VFIO_MIG_FLAG_DEV_DATA_STATE(0xef14ULL)
>  
> +#define VFIO_MIG_DATA_BUFFER_SIZE (1024 * 1024)

Add comment explaining heuristic of this size.

> +
>  static int64_t bytes_transferred;
>  
> +static const char *mig_state_to_str(enum vfio_device_mig_state state)
> +{
> +switch (state) {
> +case VFIO_DEVICE_STATE_ERROR:
> +return "ERROR";
> +case VFIO_DEVICE_STATE_STOP:
> +return "STOP";
> +case VFIO_DEVICE_STATE_RUNNING:
> +return "RUNNING";
> +case VFIO_DEVICE_STATE_STOP_COPY:
> +return "STOP_COPY";
> +case VFIO_DEVICE_STATE_RESUMING:
> +return "RESUMING";
> +case VFIO_DEVICE_STATE_RUNNING_P2P:
> +return "RUNNING_P2P";
> +default:
> +return "UNKNOWN STATE";
> +}
> +}
> +
> +static int vfio_migration_set_state(VFIODevice *vbasedev,
> +enum vfio_device_mig_state new_state,
> +enum vfio_device_mig_state recover_state)
> +{
> +VFIOMigration *migration = vbasedev->migration;
> +uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
> +  sizeof(struct vfio_device_feature_mig_state),
> +  sizeof(uint64_t))] = {};
> +struct vfio_device_feature *feature = (void *)buf;
> +struct vfio_device_feature_mig_state *mig_state = (void *)feature->data;

We can cast to the actual types rather than void* here.

> +
> +feature->argsz = sizeof(buf);
> +feature->flags =
> +VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE;
> +mig_state->device_state = new_state;
> +if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
> +/* Try to set the device in some good state */
> +error_report(
> +"%s: Failed setting device state to %s, err: %s. Setting device 
> in recover state %s",
> + vbasedev->name, 

Re: [PATCH v9 3/8] KVM: Add KVM_EXIT_MEMORY_FAULT exit

2022-11-16 Thread Andy Lutomirski



On Tue, Oct 25, 2022, at 8:13 AM, Chao Peng wrote:
> This new KVM exit allows userspace to handle memory-related errors. It
> indicates an error happens in KVM at guest memory range [gpa, gpa+size).
> The flags includes additional information for userspace to handle the
> error. Currently bit 0 is defined as 'private memory' where '1'
> indicates error happens due to private memory access and '0' indicates
> error happens due to shared memory access.
>
> When private memory is enabled, this new exit will be used for KVM to
> exit to userspace for shared <-> private memory conversion in memory
> encryption usage. In such usage, typically there are two kind of memory
> conversions:
>   - explicit conversion: happens when guest explicitly calls into KVM
> to map a range (as private or shared), KVM then exits to userspace
> to perform the map/unmap operations.
>   - implicit conversion: happens in KVM page fault handler where KVM
> exits to userspace for an implicit conversion when the page is in a
> different state than requested (private or shared).
>
> Suggested-by: Sean Christopherson 
> Co-developed-by: Yu Zhang 
> Signed-off-by: Yu Zhang 
> Signed-off-by: Chao Peng 
> ---
>  Documentation/virt/kvm/api.rst | 23 +++
>  include/uapi/linux/kvm.h   |  9 +
>  2 files changed, 32 insertions(+)
>
> diff --git a/Documentation/virt/kvm/api.rst 
> b/Documentation/virt/kvm/api.rst
> index f3fa75649a78..975688912b8c 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -6537,6 +6537,29 @@ array field represents return values. The 
> userspace should update the return
>  values of SBI call before resuming the VCPU. For more details on 
> RISC-V SBI
>  spec refer, https://github.com/riscv/riscv-sbi-doc.
> 
> +::
> +
> + /* KVM_EXIT_MEMORY_FAULT */
> + struct {
> +  #define KVM_MEMORY_EXIT_FLAG_PRIVATE   (1 << 0)
> + __u32 flags;
> + __u32 padding;
> + __u64 gpa;
> + __u64 size;
> + } memory;
> +

Would it make sense to also have a field for the access type (read, write, 
execute, etc)?  I realize that shared <-> private conversion doesn't strictly 
need this, but it seems like it could be useful for logging failures and also 
for avoiding a second immediate fault if the type gets converted but doesn't 
have the right protection yet.

(Obviously, if this were changed, KVM would need the ability to report that it 
doesn't actually know the mode.)

--Andy



Re: [PATCH] tests/avocado: configure acpi-bits to use avocado timeout

2022-11-16 Thread John Snow
On Tue, Nov 15, 2022, 10:24 PM Ani Sinha  wrote:

> On Wed, Nov 16, 2022 at 2:58 AM John Snow  wrote:
> >
> > Instead of using a hardcoded timeout, just rely on Avocado's built-in
> > test case timeout. This helps avoid timeout issues on machines where 60
> > seconds is not sufficient.
> >
> > Signed-off-by: John Snow 
>
> Reviewed-by: Ani Sinha 
>

Alex's critique is valid, though: the way vm.wait() works is to
immediately  terminate the serial console connection as it prepares for the
VM to shut down. I forgot about this.

(For historical reasons, it does this to avoid deadlocks when the pipe
fills.)

I think we definitely do want to make sure we watch the console *while* we
wait for it to shut down, which is not a feature QEMUMachine really offers
right now in a meaningful way.

I need to make some more drastic changes to machine.py, but in the meantime
I can revise this patch to do something a bit smarter so we get console
logging while we wait. This is a use case worth supporting.

(Thanks for writing new and interesting tests!)


> > ---
> >  tests/avocado/acpi-bits.py | 10 ++
> >  1 file changed, 2 insertions(+), 8 deletions(-)
> >
> > diff --git a/tests/avocado/acpi-bits.py b/tests/avocado/acpi-bits.py
> > index 8745a58a766..ac13e22dc93 100644
> > --- a/tests/avocado/acpi-bits.py
> > +++ b/tests/avocado/acpi-bits.py
> > @@ -385,12 +385,6 @@ def test_acpi_smbios_bits(self):
> >  self._vm.launch()
> >  # biosbits has been configured to run all the specified test
> suites
> >  # in batch mode and then automatically initiate a vm shutdown.
> > -# sleep for maximum of one minute
> > -max_sleep_time = time.monotonic() + 60
> > -while self._vm.is_running() and time.monotonic() <
> max_sleep_time:
> > -time.sleep(1)
> > -
> > -self.assertFalse(time.monotonic() > max_sleep_time,
> > - 'The VM seems to have failed to shutdown in
> time')
> > -
> > +# Rely on avocado's unit test timeout.
> > +self._vm.wait(timeout=None)
>
> I think this is fine. This just waits until the VM is shutdown on its
> own and relies on the avocado framework to timeout if it doesn't. We
> do not need to look into the console. The test issues a shutdown from
> the VM itself once its done with the batch operations.


Still, if it fails, we want to see the output, right? It's very frustrating
if it doesn't, especially in an automated pipeline.


> >  self.parse_log()
> > --
> > 2.37.3
> >
>
>


[PATCH] ci: replace x86_64 macos-11 with aarch64 macos-12

2022-11-16 Thread Daniel P . Berrangé
The Cirrus CI service has announced the intent to discontinue
support for x86_64 macOS CI runners. They already have aarch64
runners available and require all projects to switch to these
images before Jan 1st 2023. The different architecture is
merely determined by the image name requested.

For aarch64 they only support macOS 12 onwards. At the same
time our support policy only guarantees the most recent 2
major versions, so macOS 12 is already technically our min
version.

https://cirrus-ci.org/blog/2022/11/08/sunsetting-intel-macos-instances/
Signed-off-by: Daniel P. Berrangé 
---
 .gitlab-ci.d/cirrus.yml  | 12 ++--
 .gitlab-ci.d/cirrus/{macos-11.vars => macos-12.vars} | 12 ++--
 tests/lcitool/libvirt-ci |  2 +-
 tests/lcitool/refresh|  2 +-
 4 files changed, 14 insertions(+), 14 deletions(-)
 rename .gitlab-ci.d/cirrus/{macos-11.vars => macos-12.vars} (74%)

diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml
index d70da61248..634a73a742 100644
--- a/.gitlab-ci.d/cirrus.yml
+++ b/.gitlab-ci.d/cirrus.yml
@@ -70,19 +70,19 @@ x64-freebsd-13-build:
 INSTALL_COMMAND: pkg install -y
 TEST_TARGETS: check
 
-x64-macos-11-base-build:
+aarch64-macos-12-base-build:
   extends: .cirrus_build_job
   variables:
-NAME: macos-11
-CIRRUS_VM_INSTANCE_TYPE: osx_instance
+NAME: macos-12
+CIRRUS_VM_INSTANCE_TYPE: macos_instance
 CIRRUS_VM_IMAGE_SELECTOR: image
-CIRRUS_VM_IMAGE_NAME: big-sur-base
+CIRRUS_VM_IMAGE_NAME: ghcr.io/cirruslabs/macos-monterey-base:latest
 CIRRUS_VM_CPUS: 12
 CIRRUS_VM_RAM: 24G
 UPDATE_COMMAND: brew update
 INSTALL_COMMAND: brew install
-PATH_EXTRA: /usr/local/opt/ccache/libexec:/usr/local/opt/gettext/bin
-PKG_CONFIG_PATH: 
/usr/local/opt/curl/lib/pkgconfig:/usr/local/opt/ncurses/lib/pkgconfig:/usr/local/opt/readline/lib/pkgconfig
+PATH_EXTRA: /opt/homebrew/ccache/libexec:/opt/homebrew/gettext/bin
+PKG_CONFIG_PATH: 
/opt/homebrew/curl/lib/pkgconfig:/opt/homebrew/ncurses/lib/pkgconfig:/opt/homebrew/readline/lib/pkgconfig
 TEST_TARGETS: check-unit check-block check-qapi-schema check-softfloat 
check-qtest-x86_64
 
 
diff --git a/.gitlab-ci.d/cirrus/macos-11.vars 
b/.gitlab-ci.d/cirrus/macos-12.vars
similarity index 74%
rename from .gitlab-ci.d/cirrus/macos-11.vars
rename to .gitlab-ci.d/cirrus/macos-12.vars
index aee9f50de6..ef9e14b373 100644
--- a/.gitlab-ci.d/cirrus/macos-11.vars
+++ b/.gitlab-ci.d/cirrus/macos-12.vars
@@ -1,16 +1,16 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool variables macos-11 qemu
+#  $ lcitool variables macos-12 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-CCACHE='/usr/local/bin/ccache'
+CCACHE='/opt/homebrew/bin/ccache'
 CPAN_PKGS=''
 CROSS_PKGS=''
-MAKE='/usr/local/bin/gmake'
-NINJA='/usr/local/bin/ninja'
+MAKE='/opt/homebrew/bin/gmake'
+NINJA='/opt/homebrew/bin/ninja'
 PACKAGING_COMMAND='brew'
-PIP3='/usr/local/bin/pip3'
+PIP3='/opt/homebrew/bin/pip3'
 PKGS='bash bc bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc 
flex gcovr gettext git glib gnu-sed gnutls gtk+3 jemalloc jpeg-turbo json-c 
libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 
libusb llvm lzo make meson ncurses nettle ninja perl pixman pkg-config python3 
rpm2cpio sdl2 sdl2_image snappy sparse spice-protocol tesseract texinfo 
usbredir vde vte3 zlib zstd'
 PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme'
-PYTHON='/usr/local/bin/python3'
+PYTHON='/opt/homebrew/bin/python3'
diff --git a/tests/lcitool/libvirt-ci b/tests/lcitool/libvirt-ci
index d40e203631..e3eb28cf2e 16
--- a/tests/lcitool/libvirt-ci
+++ b/tests/lcitool/libvirt-ci
@@ -1 +1 @@
-Subproject commit d40e203631eb3eacee17e8cf8fd20aa5152db62a
+Subproject commit e3eb28cf2e17fbcf7fe7e19505ee432b8ec5bbb5
diff --git a/tests/lcitool/refresh b/tests/lcitool/refresh
index ce0b24c0b1..fa966e4009 100755
--- a/tests/lcitool/refresh
+++ b/tests/lcitool/refresh
@@ -176,7 +176,7 @@ try:
 #
 generate_cirrus("freebsd-12")
 generate_cirrus("freebsd-13")
-generate_cirrus("macos-11")
+generate_cirrus("macos-12")
 
 sys.exit(0)
 except Exception as ex:
-- 
2.38.1




[PATCH v2 1/3] arm: move KVM breakpoints helpers

2022-11-16 Thread francesco . cagnin
From: Francesco Cagnin 

These helpers will be also used for HVF. Aside from reformatting a
couple of comments for 'checkpatch.pl' and updating meson to compile
'hyp_gdbstub.c', this is just code motion.

Signed-off-by: Francesco Cagnin 
---
 target/arm/hyp_gdbstub.c | 242 ++
 target/arm/internals.h   |  50 +++
 target/arm/kvm64.c   | 276 ---
 target/arm/meson.build   |   3 +-
 4 files changed, 294 insertions(+), 277 deletions(-)
 create mode 100644 target/arm/hyp_gdbstub.c

diff --git a/target/arm/hyp_gdbstub.c b/target/arm/hyp_gdbstub.c
new file mode 100644
index 00..22b2b7de7b
--- /dev/null
+++ b/target/arm/hyp_gdbstub.c
@@ -0,0 +1,242 @@
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "exec/gdbstub.h"
+
+/* Maximum and current break/watch point counts */
+int max_hw_bps, max_hw_wps;
+GArray *hw_breakpoints, *hw_watchpoints;
+
+/**
+ * insert_hw_breakpoint()
+ * @addr: address of breakpoint
+ *
+ * See ARM ARM D2.9.1 for details but here we are only going to create
+ * simple un-linked breakpoints (i.e. we don't chain breakpoints
+ * together to match address and context or vmid). The hardware is
+ * capable of fancier matching but that will require exposing that
+ * fanciness to GDB's interface
+ *
+ * DBGBCR_EL1, Debug Breakpoint Control Registers
+ *
+ *  31  24 23  20 19   16 15 14  13  12   9 8   5 43 2   1  0
+ * +--+--+---+-++--+-+--+-+---+
+ * | RES0 |  BT  |  LBN  | SSC | HMC| RES0 | BAS | RES0 | PMC | E |
+ * +--+--+---+-++--+-+--+-+---+
+ *
+ * BT: Breakpoint type (0 = unlinked address match)
+ * LBN: Linked BP number (0 = unused)
+ * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12)
+ * BAS: Byte Address Select (RES1 for AArch64)
+ * E: Enable bit
+ *
+ * DBGBVR_EL1, Debug Breakpoint Value Registers
+ *
+ *  63  53 52   49 48   2  1 0
+ * +--+---+--+-+
+ * | RESS | VA[52:49] | VA[48:2] | 0 0 |
+ * +--+---+--+-+
+ *
+ * Depending on the addressing mode bits the top bits of the register
+ * are a sign extension of the highest applicable VA bit. Some
+ * versions of GDB don't do it correctly so we ensure they are correct
+ * here so future PC comparisons will work properly.
+ */
+
+int insert_hw_breakpoint(target_ulong addr)
+{
+HWBreakpoint brk = {
+.bcr = 0x1, /* BCR E=1, enable */
+.bvr = sextract64(addr, 0, 53)
+};
+
+if (cur_hw_bps >= max_hw_bps) {
+return -ENOBUFS;
+}
+
+brk.bcr = deposit32(brk.bcr, 1, 2, 0x3);   /* PMC = 11 */
+brk.bcr = deposit32(brk.bcr, 5, 4, 0xf);   /* BAS = RES1 */
+
+g_array_append_val(hw_breakpoints, brk);
+
+return 0;
+}
+
+/**
+ * delete_hw_breakpoint()
+ * @pc: address of breakpoint
+ *
+ * Delete a breakpoint and shuffle any above down
+ */
+
+int delete_hw_breakpoint(target_ulong pc)
+{
+int i;
+for (i = 0; i < hw_breakpoints->len; i++) {
+HWBreakpoint *brk = get_hw_bp(i);
+if (brk->bvr == pc) {
+g_array_remove_index(hw_breakpoints, i);
+return 0;
+}
+}
+return -ENOENT;
+}
+
+/**
+ * insert_hw_watchpoint()
+ * @addr: address of watch point
+ * @len: size of area
+ * @type: type of watch point
+ *
+ * See ARM ARM D2.10. As with the breakpoints we can do some advanced
+ * stuff if we want to. The watch points can be linked with the break
+ * points above to make them context aware. However for simplicity
+ * currently we only deal with simple read/write watch points.
+ *
+ * D7.3.11 DBGWCR_EL1, Debug Watchpoint Control Registers
+ *
+ *  31  29 28   24 23  21  20  19 16 15 14  13   12  5 4   3 2   1  0
+ * +--+---+--++-+-+-+-+-+-+---+
+ * | RES0 |  MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E |
+ * +--+---+--++-+-+-+-+-+-+---+
+ *
+ * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes))
+ * WT: 0 - unlinked, 1 - linked (not currently used)
+ * LBN: Linked BP number (not currently used)
+ * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11)
+ * BAS: Byte Address Select
+ * LSC: Load/Store control (01: load, 10: store, 11: both)
+ * E: Enable
+ *
+ * The bottom 2 bits of the value register are masked. Therefore to
+ * break on any sizes smaller than an unaligned word you need to set
+ * MASK=0, BAS=bit per byte in question. For larger regions (^2) you
+ * need to ensure you mask the address as required and set BAS=0xff
+ */
+
+int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type)
+{
+HWWatchpoint wp = {
+.wcr = R_DBGWCR_E_MASK, /* E=1, enable */
+.wvr = addr & (~0x7ULL),
+.details = { .vaddr = addr, .len = len }
+};
+
+if (cur_hw_wps >= max_hw_wps) {
+return -ENOBUFS;
+}
+
+

[PATCH v2 2/3] hvf: implement guest debugging on Apple Silicon hosts

2022-11-16 Thread francesco . cagnin
From: Francesco Cagnin 

Support is added for single-stepping, software breakpoints, hardware
breakpoints and watchpoints. The code has been structured like the KVM
counterpart (and many parts are basically identical).

Guests can be debugged through the gdbstub.

Signed-off-by: Francesco Cagnin 
---
 accel/hvf/hvf-accel-ops.c | 123 
 accel/hvf/hvf-all.c   |  24 +
 cpu.c |   3 +
 include/sysemu/hvf.h  |  29 ++
 include/sysemu/hvf_int.h  |   1 +
 target/arm/hvf/hvf.c  | 194 +-
 6 files changed, 372 insertions(+), 2 deletions(-)

diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index 24913ca9c4..1ce0f94a64 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -52,6 +52,7 @@
 #include "qemu/main-loop.h"
 #include "exec/address-spaces.h"
 #include "exec/exec-all.h"
+#include "exec/gdbstub.h"
 #include "sysemu/cpus.h"
 #include "sysemu/hvf.h"
 #include "sysemu/hvf_int.h"
@@ -340,12 +341,18 @@ static int hvf_accel_init(MachineState *ms)
 return hvf_arch_init();
 }
 
+static int hvf_gdbstub_sstep_flags(void)
+{
+return SSTEP_ENABLE | SSTEP_NOIRQ;
+}
+
 static void hvf_accel_class_init(ObjectClass *oc, void *data)
 {
 AccelClass *ac = ACCEL_CLASS(oc);
 ac->name = "HVF";
 ac->init_machine = hvf_accel_init;
 ac->allowed = _allowed;
+ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags;
 }
 
 static const TypeInfo hvf_accel_type = {
@@ -462,6 +469,117 @@ static void hvf_start_vcpu_thread(CPUState *cpu)
cpu, QEMU_THREAD_JOINABLE);
 }
 
+static bool hvf_supports_guest_debug(void)
+{
+#ifdef TARGET_AARCH64
+return true;
+#else
+return false;
+#endif
+}
+
+static int hvf_insert_breakpoint(CPUState *cpu, int type, hwaddr addr, hwaddr 
len)
+{
+struct hvf_sw_breakpoint *bp;
+int err;
+
+if (type == GDB_BREAKPOINT_SW) {
+bp = hvf_find_sw_breakpoint(cpu, addr);
+if (bp) {
+bp->use_count++;
+return 0;
+}
+
+bp = g_new(struct hvf_sw_breakpoint, 1);
+bp->pc = addr;
+bp->use_count = 1;
+err = hvf_arch_insert_sw_breakpoint(cpu, bp);
+if (err) {
+g_free(bp);
+return err;
+}
+
+QTAILQ_INSERT_HEAD(>hvf->hvf_sw_breakpoints, bp, entry);
+} else {
+err = hvf_arch_insert_hw_breakpoint(addr, len, type);
+if (err) {
+return err;
+}
+}
+
+CPU_FOREACH(cpu) {
+err = hvf_update_guest_debug(cpu);
+if (err) {
+return err;
+}
+}
+return 0;
+}
+
+static int hvf_remove_breakpoint(CPUState *cpu, int type, hwaddr addr, hwaddr 
len)
+{
+struct hvf_sw_breakpoint *bp;
+int err;
+
+if (type == GDB_BREAKPOINT_SW) {
+bp = hvf_find_sw_breakpoint(cpu, addr);
+if (!bp) {
+return -ENOENT;
+}
+
+if (bp->use_count > 1) {
+bp->use_count--;
+return 0;
+}
+
+err = hvf_arch_remove_sw_breakpoint(cpu, bp);
+if (err) {
+return err;
+}
+
+QTAILQ_REMOVE(>hvf->hvf_sw_breakpoints, bp, entry);
+g_free(bp);
+} else {
+err = hvf_arch_remove_hw_breakpoint(addr, len, type);
+if (err) {
+return err;
+}
+}
+
+CPU_FOREACH(cpu) {
+err = hvf_update_guest_debug(cpu);
+if (err) {
+return err;
+}
+}
+return 0;
+}
+
+static void hvf_remove_all_breakpoints(CPUState *cpu)
+{
+struct hvf_sw_breakpoint *bp, *next;
+CPUState *tmpcpu;
+
+QTAILQ_FOREACH_SAFE(bp, >hvf->hvf_sw_breakpoints, entry, next) {
+if (hvf_arch_remove_sw_breakpoint(cpu, bp) != 0) {
+/* Try harder to find a CPU that currently sees the breakpoint. */
+CPU_FOREACH(tmpcpu)
+{
+if (hvf_arch_remove_sw_breakpoint(tmpcpu, bp) == 0) {
+break;
+}
+}
+}
+QTAILQ_REMOVE(>hvf->hvf_sw_breakpoints, bp, entry);
+g_free(bp);
+}
+hvf_arch_remove_all_hw_breakpoints();
+
+CPU_FOREACH(cpu) {
+hvf_update_guest_debug(cpu);
+}
+}
+
 static void hvf_accel_ops_class_init(ObjectClass *oc, void *data)
 {
 AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
@@ -473,6 +591,11 @@ static void hvf_accel_ops_class_init(ObjectClass *oc, void 
*data)
 ops->synchronize_post_init = hvf_cpu_synchronize_post_init;
 ops->synchronize_state = hvf_cpu_synchronize_state;
 ops->synchronize_pre_loadvm = hvf_cpu_synchronize_pre_loadvm;
+
+ops->supports_guest_debug = hvf_supports_guest_debug;
+ops->insert_breakpoint = hvf_insert_breakpoint;
+ops->remove_breakpoint = hvf_remove_breakpoint;
+ops->remove_all_breakpoints = hvf_remove_all_breakpoints;
 };
 static const TypeInfo hvf_accel_ops_type = {
 .name = 

[PATCH v2 0/3] Add gdbstub support to HVF

2022-11-16 Thread francesco . cagnin
From: Francesco Cagnin 

This patch series aims to add gdbstub support to HVF (the 'QEMU
accelerator on macOS that employs Hypervisor.framework') on Apple
Silicon hosts.

The proposed implementation, structured like the KVM counterpart,
handles single-stepping, software breakpoints, hardware breakpoints and
hardware watchpoints.

The patch has been most recently tested working on macOS Ventura 13.0
hosts and Linux kernel 5.19 guests with the test script
'tests/guest-debug/test-gdbstub.py' (slightly updated to make it work
with Linux kernels compiled on macOS).

If deemed useful, I can also submit an analogous patch targeting Intel
hosts.

v2:
* Move debug helpers to 'target/arm/hyp_gdbstub.c'
* Add support for SSTEP_NOIRQ and multi-core (thanks Mads Ynddal)
* Move calls to 'hv_vcpu_set_trap_debug_exceptions()' to
  'hvf_arch_update_guest_debug()'
* Use 'arm_num_brps()' and 'arm_num_wrps()' to compute the number of
  breakpoints and watchpoints available (thanks Peter Maydell)

Francesco Cagnin (3):
  arm: move KVM breakpoints helpers
  hvf: implement guest debugging on Apple Silicon hosts
  hvf: handle writes of MDSCR_EL1 and DBG*_EL1

 accel/hvf/hvf-accel-ops.c | 123 ++
 accel/hvf/hvf-all.c   |  24 +++
 cpu.c |   3 +
 include/sysemu/hvf.h  |  29 
 include/sysemu/hvf_int.h  |   1 +
 target/arm/hvf/hvf.c  | 334 +-
 target/arm/hyp_gdbstub.c  | 242 +++
 target/arm/internals.h|  50 ++
 target/arm/kvm64.c| 276 ---
 target/arm/meson.build|   3 +-
 10 files changed, 806 insertions(+), 279 deletions(-)
 create mode 100644 target/arm/hyp_gdbstub.c

-- 
2.38.1




[PATCH v2 3/3] hvf: handle writes of MDSCR_EL1 and DBG*_EL1

2022-11-16 Thread francesco . cagnin
From: Francesco Cagnin 

This proved to be required when debugging the Linux kernel's initial
code, as the Hypervisor framework was triggering 'EC_SYSTEMREGISTERTRAP'
VM exits after enabling trap exceptions with
'hv_vcpu_set_trap_debug_exceptions()'.

Signed-off-by: Francesco Cagnin 
---
 target/arm/hvf/hvf.c | 140 +++
 1 file changed, 140 insertions(+)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 66fc82e9b9..c28c3dbdaa 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -95,6 +95,71 @@ static void hvf_arm_init_debug(CPUState *cpu)
 #define SYSREG_PMCEID1_EL0SYSREG(3, 3, 9, 12, 7)
 #define SYSREG_PMCCNTR_EL0SYSREG(3, 3, 9, 13, 0)
 #define SYSREG_PMCCFILTR_EL0  SYSREG(3, 3, 14, 15, 7)
+#define SYSREG_MDSCR_EL1  SYSREG(2, 0, 0, 2, 2)
+#define SYSREG_DBGBVR0_EL1SYSREG(2, 0, 0, 0, 4)
+#define SYSREG_DBGBCR0_EL1SYSREG(2, 0, 0, 0, 5)
+#define SYSREG_DBGWVR0_EL1SYSREG(2, 0, 0, 0, 6)
+#define SYSREG_DBGWCR0_EL1SYSREG(2, 0, 0, 0, 7)
+#define SYSREG_DBGBVR1_EL1SYSREG(2, 0, 0, 1, 4)
+#define SYSREG_DBGBCR1_EL1SYSREG(2, 0, 0, 1, 5)
+#define SYSREG_DBGWVR1_EL1SYSREG(2, 0, 0, 1, 6)
+#define SYSREG_DBGWCR1_EL1SYSREG(2, 0, 0, 1, 7)
+#define SYSREG_DBGBVR2_EL1SYSREG(2, 0, 0, 2, 4)
+#define SYSREG_DBGBCR2_EL1SYSREG(2, 0, 0, 2, 5)
+#define SYSREG_DBGWVR2_EL1SYSREG(2, 0, 0, 2, 6)
+#define SYSREG_DBGWCR2_EL1SYSREG(2, 0, 0, 2, 7)
+#define SYSREG_DBGBVR3_EL1SYSREG(2, 0, 0, 3, 4)
+#define SYSREG_DBGBCR3_EL1SYSREG(2, 0, 0, 3, 5)
+#define SYSREG_DBGWVR3_EL1SYSREG(2, 0, 0, 3, 6)
+#define SYSREG_DBGWCR3_EL1SYSREG(2, 0, 0, 3, 7)
+#define SYSREG_DBGBVR4_EL1SYSREG(2, 0, 0, 4, 4)
+#define SYSREG_DBGBCR4_EL1SYSREG(2, 0, 0, 4, 5)
+#define SYSREG_DBGWVR4_EL1SYSREG(2, 0, 0, 4, 6)
+#define SYSREG_DBGWCR4_EL1SYSREG(2, 0, 0, 4, 7)
+#define SYSREG_DBGBVR5_EL1SYSREG(2, 0, 0, 5, 4)
+#define SYSREG_DBGBCR5_EL1SYSREG(2, 0, 0, 5, 5)
+#define SYSREG_DBGWVR5_EL1SYSREG(2, 0, 0, 5, 6)
+#define SYSREG_DBGWCR5_EL1SYSREG(2, 0, 0, 5, 7)
+#define SYSREG_DBGBVR6_EL1SYSREG(2, 0, 0, 6, 4)
+#define SYSREG_DBGBCR6_EL1SYSREG(2, 0, 0, 6, 5)
+#define SYSREG_DBGWVR6_EL1SYSREG(2, 0, 0, 6, 6)
+#define SYSREG_DBGWCR6_EL1SYSREG(2, 0, 0, 6, 7)
+#define SYSREG_DBGBVR7_EL1SYSREG(2, 0, 0, 7, 4)
+#define SYSREG_DBGBCR7_EL1SYSREG(2, 0, 0, 7, 5)
+#define SYSREG_DBGWVR7_EL1SYSREG(2, 0, 0, 7, 6)
+#define SYSREG_DBGWCR7_EL1SYSREG(2, 0, 0, 7, 7)
+#define SYSREG_DBGBVR8_EL1SYSREG(2, 0, 0, 8, 4)
+#define SYSREG_DBGBCR8_EL1SYSREG(2, 0, 0, 8, 5)
+#define SYSREG_DBGWVR8_EL1SYSREG(2, 0, 0, 8, 6)
+#define SYSREG_DBGWCR8_EL1SYSREG(2, 0, 0, 8, 7)
+#define SYSREG_DBGBVR9_EL1SYSREG(2, 0, 0, 9, 4)
+#define SYSREG_DBGBCR9_EL1SYSREG(2, 0, 0, 9, 5)
+#define SYSREG_DBGWVR9_EL1SYSREG(2, 0, 0, 9, 6)
+#define SYSREG_DBGWCR9_EL1SYSREG(2, 0, 0, 9, 7)
+#define SYSREG_DBGBVR10_EL1   SYSREG(2, 0, 0, 10, 4)
+#define SYSREG_DBGBCR10_EL1   SYSREG(2, 0, 0, 10, 5)
+#define SYSREG_DBGWVR10_EL1   SYSREG(2, 0, 0, 10, 6)
+#define SYSREG_DBGWCR10_EL1   SYSREG(2, 0, 0, 10, 7)
+#define SYSREG_DBGBVR11_EL1   SYSREG(2, 0, 0, 11, 4)
+#define SYSREG_DBGBCR11_EL1   SYSREG(2, 0, 0, 11, 5)
+#define SYSREG_DBGWVR11_EL1   SYSREG(2, 0, 0, 11, 6)
+#define SYSREG_DBGWCR11_EL1   SYSREG(2, 0, 0, 11, 7)
+#define SYSREG_DBGBVR12_EL1   SYSREG(2, 0, 0, 12, 4)
+#define SYSREG_DBGBCR12_EL1   SYSREG(2, 0, 0, 12, 5)
+#define SYSREG_DBGWVR12_EL1   SYSREG(2, 0, 0, 12, 6)
+#define SYSREG_DBGWCR12_EL1   SYSREG(2, 0, 0, 12, 7)
+#define SYSREG_DBGBVR13_EL1   SYSREG(2, 0, 0, 13, 4)
+#define SYSREG_DBGBCR13_EL1   SYSREG(2, 0, 0, 13, 5)
+#define SYSREG_DBGWVR13_EL1   SYSREG(2, 0, 0, 13, 6)
+#define SYSREG_DBGWCR13_EL1   SYSREG(2, 0, 0, 13, 7)
+#define SYSREG_DBGBVR14_EL1   SYSREG(2, 0, 0, 14, 4)
+#define SYSREG_DBGBCR14_EL1   SYSREG(2, 0, 0, 14, 5)
+#define SYSREG_DBGWVR14_EL1   SYSREG(2, 0, 0, 14, 6)
+#define SYSREG_DBGWCR14_EL1   SYSREG(2, 0, 0, 14, 7)
+#define SYSREG_DBGBVR15_EL1   SYSREG(2, 0, 0, 15, 4)
+#define SYSREG_DBGBCR15_EL1   SYSREG(2, 0, 0, 15, 5)
+#define SYSREG_DBGWVR15_EL1   SYSREG(2, 0, 0, 15, 6)
+#define SYSREG_DBGWCR15_EL1   SYSREG(2, 0, 0, 15, 7)
 
 #define WFX_IS_WFE (1 << 0)
 
@@ -1039,6 +1104,81 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, 
uint64_t val)
 case SYSREG_OSDLR_EL1:
 /* Dummy register */
 break;
+case SYSREG_MDSCR_EL1:
+env->cp15.mdscr_el1 = val;
+break;
+case SYSREG_DBGBVR0_EL1:
+case SYSREG_DBGBVR1_EL1:
+case SYSREG_DBGBVR2_EL1:
+case SYSREG_DBGBVR3_EL1:
+case SYSREG_DBGBVR4_EL1:
+case SYSREG_DBGBVR5_EL1:
+case SYSREG_DBGBVR6_EL1:
+case SYSREG_DBGBVR7_EL1:
+case SYSREG_DBGBVR8_EL1:
+case SYSREG_DBGBVR9_EL1:
+case SYSREG_DBGBVR10_EL1:
+case SYSREG_DBGBVR11_EL1:
+case SYSREG_DBGBVR12_EL1:
+case SYSREG_DBGBVR13_EL1:
+case SYSREG_DBGBVR14_EL1:
+case 

[PATCH v3 0/2] Add OCP extended log to nvme QEMU

2022-11-16 Thread Joel Granados
The motivation and description are contained in the last patch in this set.
Will copy paste it here for convenience:

In order to evaluate write amplification factor (WAF) within the storage
stack it is important to know the number of bytes written to the
controller. The existing SMART log value of Data Units Written is too
coarse (given in units of 500 Kb) and so we add the SMART health
information extended from the OCP specification (given in units of bytes).

To accommodate different vendor specific specifications like OCP, we add a
multiplexing function (nvme_vendor_specific_log) which will route to the
different log functions based on arguments and log ids. We only return the
OCP extended smart log when the command is 0xC0 and ocp has been turned on
in the args.

Though we add the whole nvme smart log extended structure, we only populate
the physical_media_units_{read,written}, log_page_version and
log_page_uuid.

V3 changes:
1. Corrected a bunch of checkpatch issues. Since I changed the first patch
   I did not include the reviewed-by.
2. Included some documentation in nvme.rst for the ocp argument
3. Squashed the ocp arg changes into the main patch.
4. Fixed several comments and an open parenthesis
5. Hex values are now in lower case.
6. Change the reserved format to rsvd
7. Made sure that NvmeCtrl is the first arg in all the functions.
8. Fixed comment on commit of main patch

V2 changes:
1. I moved the ocp parameter from the namespace to the subsystem as it is
   defined there in the OCP specification
2. I now accumulate statistics from all namespaces and report them back on
   the extended log as per the spec.
3. I removed the default case in the switch in nvme_vendor_specific_log as
   it does not have any special function.

Joel Granados (2):
  nvme: Move adjustment of data_units{read,written}
  nvme: Add physical writes/reads from OCP log

 docs/system/devices/nvme.rst |  7 
 hw/nvme/ctrl.c   | 69 
 hw/nvme/nvme.h   |  1 +
 include/block/nvme.h | 36 +++
 4 files changed, 107 insertions(+), 6 deletions(-)

-- 
2.30.2




[PATCH v3 2/2] nvme: Add physical writes/reads from OCP log

2022-11-16 Thread Joel Granados
In order to evaluate write amplification factor (WAF) within the storage
stack it is important to know the number of bytes written to the
controller. The existing SMART log value of Data Units Written is too
coarse (given in units of 500 Kb) and so we add the SMART health
information extended from the OCP specification (given in units of bytes)

We add a controller argument (ocp) that toggles on/off the SMART log
extended structure.  To accommodate different vendor specific specifications
like OCP, we add a multiplexing function (nvme_vendor_specific_log) which
will route to the different log functions based on arguments and log ids.
We only return the OCP extended SMART log when the command is 0xC0 and ocp
has been turned on in the args.

Though we add the whole nvme SMART log extended structure, we only populate
the physical_media_units_{read,written}, log_page_version and
log_page_uuid.

Signed-off-by: Joel Granados 
---
 docs/system/devices/nvme.rst |  7 +
 hw/nvme/ctrl.c   | 55 
 hw/nvme/nvme.h   |  1 +
 include/block/nvme.h | 36 +++
 4 files changed, 99 insertions(+)

diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
index 30f841ef62..1cc5e52c00 100644
--- a/docs/system/devices/nvme.rst
+++ b/docs/system/devices/nvme.rst
@@ -53,6 +53,13 @@ parameters.
   Vendor ID. Set this to ``on`` to revert to the unallocated Intel ID
   previously used.
 
+``ocp`` (default: ``off``)
+  The Open Compute Project defines the Datacenter NVMe SSD Specification that
+  sits on top of NVMe. It describes additional commands and NVMe behaviors
+  specific for the Datacenter. When this option is ``on`` OCP features such as
+  the SMART / Health information extended log become available in the
+  controller.
+
 Additional Namespaces
 -
 
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index bf291f7ffe..c7215a4ed1 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4455,6 +4455,41 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct 
nvme_stats *stats)
 stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
 }
 
+static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae,
+ uint32_t buf_len, uint64_t off,
+ NvmeRequest *req)
+{
+NvmeNamespace *ns = NULL;
+NvmeSmartLogExtended smart_l = { 0 };
+struct nvme_stats stats = { 0 };
+uint32_t trans_len;
+
+if (off >= sizeof(smart_l)) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
+/* accumulate all stats from all namespaces */
+for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ns = nvme_ns(n, i);
+if (ns) {
+nvme_set_blk_stats(ns, );
+}
+}
+
+smart_l.physical_media_units_written[0] = cpu_to_le32(stats.units_written);
+smart_l.physical_media_units_read[0] = cpu_to_le32(stats.units_read);
+smart_l.log_page_version = 0x0003;
+smart_l.log_page_uuid[0] = 0xA4F2BFEA2810AFC5;
+smart_l.log_page_uuid[1] = 0xAFD514C97C6F4F9C;
+
+if (!rae) {
+nvme_clear_events(n, NVME_AER_TYPE_SMART);
+}
+
+trans_len = MIN(sizeof(smart_l) - off, buf_len);
+return nvme_c2h(n, (uint8_t *) _l + off, trans_len, req);
+}
+
 static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
 uint64_t off, NvmeRequest *req)
 {
@@ -4642,6 +4677,23 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t 
csi, uint32_t buf_len,
 return nvme_c2h(n, ((uint8_t *)) + off, trans_len, req);
 }
 
+static uint16_t nvme_vendor_specific_log(NvmeCtrl *n, uint8_t rae,
+ uint32_t buf_len, uint64_t off,
+ NvmeRequest *req, uint8_t lid)
+{
+switch (lid) {
+case 0xc0:
+if (n->params.ocp) {
+return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req);
+}
+break;
+/* add a case for each additional vendor specific log id */
+}
+
+trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
 static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
 {
 NvmeCmd *cmd = >cmd;
@@ -4683,6 +4735,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest 
*req)
 return nvme_error_info(n, rae, len, off, req);
 case NVME_LOG_SMART_INFO:
 return nvme_smart_info(n, rae, len, off, req);
+case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END:
+return nvme_vendor_specific_log(n, rae, len, off, req, lid);
 case NVME_LOG_FW_SLOT_INFO:
 return nvme_fw_log_info(n, len, off, req);
 case NVME_LOG_CHANGED_NSLIST:
@@ -7685,6 +7739,7 @@ static Property nvme_props[] = {
   params.sriov_max_vi_per_vf, 0),
 DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl,
   

[PATCH v3 1/2] nvme: Move adjustment of data_units{read,written}

2022-11-16 Thread Joel Granados
In order to return the units_{read/written} required by the SMART log we
need to shift the number of bytes value by BDRV_SECTORS_BITS and multiply
by 1000. This is a prep patch that moves this adjustment to where the SMART
log is calculated in order to use the stats struct for calculating OCP
extended smart log values.

Signed-off-by: Joel Granados 
---
 hw/nvme/ctrl.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 87aeba0564..bf291f7ffe 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4449,8 +4449,8 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct 
nvme_stats *stats)
 {
 BlockAcctStats *s = blk_get_stats(ns->blkconf.blk);
 
-stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
-stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
+stats->units_read += s->nr_bytes[BLOCK_ACCT_READ];
+stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE];
 stats->read_commands += s->nr_ops[BLOCK_ACCT_READ];
 stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
 }
@@ -4464,6 +4464,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, 
uint32_t buf_len,
 uint32_t trans_len;
 NvmeNamespace *ns;
 time_t current_ms;
+uint64_t u_read, u_written;
 
 if (off >= sizeof(smart)) {
 return NVME_INVALID_FIELD | NVME_DNR;
@@ -4490,10 +4491,11 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t 
rae, uint32_t buf_len,
 trans_len = MIN(sizeof(smart) - off, buf_len);
 smart.critical_warning = n->smart_critical_warning;
 
-smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read,
-1000));
-smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written,
-   1000));
+u_read = DIV_ROUND_UP(stats.units_read >> BDRV_SECTOR_BITS, 1000);
+u_written = DIV_ROUND_UP(stats.units_written >> BDRV_SECTOR_BITS, 1000);
+
+smart.data_units_read[0] = cpu_to_le64(u_read);
+smart.data_units_written[0] = cpu_to_le64(u_written);
 smart.host_read_commands[0] = cpu_to_le64(stats.read_commands);
 smart.host_write_commands[0] = cpu_to_le64(stats.write_commands);
 
-- 
2.30.2




Re: [PATCH v3] block/rbd: Add support for layered encryption

2022-11-16 Thread Ilya Dryomov
On Wed, Nov 16, 2022 at 12:15 PM Daniel P. Berrangé  wrote:
>
> On Wed, Nov 16, 2022 at 10:23:52AM +, Daniel P. Berrangé wrote:
> > On Wed, Nov 16, 2022 at 09:03:31AM +, Or Ozeri wrote:
> > > > -Original Message-
> > > > From: Daniel P. Berrangé 
> > > > Sent: 15 November 2022 19:47
> > > > To: Or Ozeri 
> > > > Cc: qemu-devel@nongnu.org; qemu-bl...@nongnu.org; Danny Harnik
> > > > ; idryo...@gmail.com
> > > > Subject: [EXTERNAL] Re: [PATCH v3] block/rbd: Add support for layered
> > > > encryption
> > > >
> > > > AFAICT, supporting layered encryption shouldn't require anything other 
> > > > than
> > > > the 'parent' addition.
> > > >
> > >
> > > Since the layered encryption API is new in librbd, we don't have to
> > > support "luks" and "luks2" at all.
> > > In librbd we are actually deprecating the use of "luks" and "luks2",
> > > and instead ask users to use "luks-any".
> >
> > Deprecating that is a bad idea. The security characteristics and
> > feature set of LUKSv1 and LUKSv2 can be quite different. If a mgmt
> > app is expecting the volume to be protected with LUKSv2, it should
> > be stating that explicitly and not permit a silent downgrade if
> > the volume was unexpectedly using LUKSv1.
> >
> > > If we don't add "luks-any" here, we will need to implement
> > > explicit cases for "luks" and "luks2" in the qemu_rbd_encryption_load2.
> > > This looks like a kind of wasteful coding that won't be actually used
> > > by users of the rbd driver in qemu.
> >
> > It isn't wasteful - supporting the formats explicitly is desirable
> > to prevent format downgrades.
> >
> > > Anyhow, we need the "luks-any" option for our use-case, so if you
> > > insist, I will first submit a patch to add "luks-any", before this
> > > patch.
> >
> > I'm pretty wary of any kind of automatic encryption format detection
> > in QEMU. The automatic block driver format probing has been a long
> > standing source of CVEs in QEMU and every single mgmt app above QEMU.
>
> Having said that, normal linux LUKS tools like cryptsetup or systemd
> LUKS integration will auto-detect  luks1 vs luks2. All cryptsetup
> commands also have an option to explicitly specify the format version.
>
> So with that precedent I guess it is ok to add 'luks-any'.

Yeah, I think we may need to reconsider the intent to deprecate
LUKS1 and LUKS2 options for loading encryption in librbd in favor
of a generic LUKS(-ANY) option.  But, just on its own, LUKS(-ANY)
is definitely a thing and having it exposed in QEMU seems natural.

Thanks,

Ilya



[PATCH] target/arm: Limit LPA2 effective output address when TCR.DS == 0

2022-11-16 Thread Ard Biesheuvel
With LPA2, the effective output address size is at most 48 bits when
TCR.DS == 0. This case is currently unhandled in the page table walker,
where we happily assume LVA/64k granule when outputsize > 48 and
param.ds == 0, resulting in the wrong conversion to be used from a
page table descriptor to a physical address.

if (outputsize > 48) {
if (param.ds) {
descaddr |= extract64(descriptor, 8, 2) << 50;
} else {
descaddr |= extract64(descriptor, 12, 4) << 48;
}

So cap the outputsize to 48 when TCR.DS is cleared, as per the
architecture.

Cc: Peter Maydell 
Cc: Philippe Mathieu-Daudé 
Cc: Richard Henderson 
Signed-off-by: Ard Biesheuvel 
---
 target/arm/ptw.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 3745ac9723474332..9a6277d862fac229 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -1222,6 +1222,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
S1Translate *ptw,
 ps = MIN(ps, param.ps);
 assert(ps < ARRAY_SIZE(pamax_map));
 outputsize = pamax_map[ps];
+
+/*
+ * With LPA2, the effective output address (OA) size is at most 48 bits
+ * unless TCR.DS == 1
+ */
+if (!param.ds && param.gran != Gran64K) {
+outputsize = MIN(outputsize, 48);
+}
 } else {
 param = aa32_va_parameters(env, address, mmu_idx);
 level = 1;
-- 
2.35.1




Re: [PATCH v9 00/10] s390x: CPU Topology

2022-11-16 Thread Christian Borntraeger

Am 02.09.22 um 09:55 schrieb Pierre Morel:

Hi,

The implementation of the CPU Topology in QEMU has been drastically
modified since the last patch series and the number of LOCs has been
greatly reduced.

Unnecessary objects have been removed, only a single S390Topology object
is created to support migration and reset.

Also a documentation has been added to the series.


To use these patches, you will need Linux V6-rc1 or newer.

Mainline patches needed are:

f5ecfee94493 2022-07-20 KVM: s390: resetting the Topology-Change-Report
24fe0195bc19 2022-07-20 KVM: s390: guest support for topology function
0130337ec45b 2022-07-20 KVM: s390: Cleanup ipte lock access and SIIF fac..

Currently this code is for KVM only, I have no idea if it is interesting
to provide a TCG patch. If ever it will be done in another series.

To have a better understanding of the S390x CPU Topology and its
implementation in QEMU you can have a look at the documentation in the
last patch.

New in this series
==

   s390x/cpus: Make absence of multithreading clear

This patch makes clear that CPU-multithreading is not supported in
the guest.

   s390x/cpu topology: core_id sets s390x CPU topology

This patch uses the core_id to build the container topology
and the placement of the CPU inside the container.

   s390x/cpu topology: reporting the CPU topology to the guest

This patch is based on the fact that the CPU type for guests
is always IFL, CPUs are always dedicated and the polarity is
always horizontal.
This may change in the future.

   hw/core: introducing drawer and books for s390x
   s390x/cpu: reporting drawers and books topology to the guest

These two patches extend the topology handling to add two
new containers levels above sockets: books and drawers.

The subject of the last patches is clear enough (I hope).

Regards,
Pierre

Pierre Morel (10):
   s390x/cpus: Make absence of multithreading clear
   s390x/cpu topology: core_id sets s390x CPU topology
   s390x/cpu topology: reporting the CPU topology to the guest
   hw/core: introducing drawer and books for s390x
   s390x/cpu: reporting drawers and books topology to the guest
   s390x/cpu_topology: resetting the Topology-Change-Report
   s390x/cpu_topology: CPU topology migration
   target/s390x: interception of PTF instruction
   s390x/cpu_topology: activating CPU topology



Do we really need a machine property? As far as I can see, old QEMU
cannot  activate the ctop facility with old and new kernel unless it
enables CAP_S390_CPU_TOPOLOGY. I do get
oldqemu  -cpu z14,ctop=on
qemu-system-s390x: Some features requested in the CPU model are not available 
in the configuration: ctop

With the newer QEMU we can. So maybe we can simply have a topology (and
then a cpu model feature) in new QEMUs and non in old. the cpu model
would then also fence migration from enabled to disabled.



Re: [PATCH v2 2/9] block-copy: add missing coroutine_fn annotations

2022-11-16 Thread Paolo Bonzini

On 11/15/22 16:41, Emanuele Giuseppe Esposito wrote:

To sum up on what was discussed in this serie, I don't really see any
strong objection against these patches, so I will soon send v3 which is
pretty much the same except for patch 1, which will be removed.

I think these patches are useful and will be even more meaningful to the
reviewer when in the next few days I send all the rwlock patches.


Yes, I agree.

FWIW I implemented path search in vrc and it found 133 candidates 
(functions that are only called by coroutine_fn are not coroutine_fns 
themselves).  I only list them after the signature because as expected, 
most of them are pointless; however there are some are obviously correct:


1) some have _co_ in their name :)

2) these five directly call a generated_co_wrapper so they're an easy catch:

vhdx_log_write_and_flush-> bdrv_flush
vhdx_log_write_and_flush-> bdrv_pread
vhdx_log_write_and_flush-> bdrv_pwrite
mirror_flush-> blk_flush
qcow2_check_refcounts   -> bdrv_pwrite
qcow2_check_refcounts   -> bdrv_pwrite_sync
qcow2_check_refcounts   -> bdrv_pread
qcow2_read_extensions   -> bdrv_pread
check_directory_consistency -> bdrv_pwrite

(vrc lets me query this with "paths [coroutine_fn_candidate] 
[no_coroutine_fn]")


3) I can also query (with "paths [coroutine_fn_candidate] ... 
[no_coroutine_fn]") those that end up calling a generated_co_wrapper. 
Among these, vrc catches block_copy_reset_unallocated from this patch:


block_copy_reset_unallocated
block_crypto_co_create_generic
calculate_l2_meta
check_directory_consistency
commit_direntries
commit_one_file
is_zero
mirror_flush
qcow2_alloc_bytes
qcow2_alloc_cluster_abort
qcow2_alloc_clusters_at
qcow2_check_refcounts
qcow2_get_last_cluster
qcow2_read_extensions
qcow2_read_snapshots
qcow2_truncate_bitmaps_check
qcow2_update_options
vhdx_log_write_and_flush
vmdk_is_cid_valid
zero_l2_subclusters

Another possibility is to identify common "entry points" in the paths to 
the no_coroutine_fn and make them generated_co_wrappers.  For example in 
qcow2 these include bitmap_list_load, update_refcount and 
get_cluster_table and the qcow2_snapshot_* functions.


Of course the analysis would have to be rerun after doing every change.

The most time consuming part is labeling coroutine_fn/no_coroutine_fn, 
which would be useful to do with clang (and at this point you might as 
well extract the CFG with it).  Doing the queries totally by hand 
doesn't quite scale (for example vrc's blind spot is inlining and I 
forgot to disable it, but I only noticed too late...), but it should be 
scriptable since after all VRC is just a Python package + a nice CLI.


Thanks,

Paolo



label coroutine_fn_candidate aio_get_thread_pool
label coroutine_fn_candidate aio_task_pool_free
label coroutine_fn_candidate aio_task_pool_status
label coroutine_fn_candidate bdrv_bsc_fill
label coroutine_fn_candidate bdrv_bsc_invalidate_range
label coroutine_fn_candidate bdrv_bsc_is_data
label coroutine_fn_candidate bdrv_can_write_zeroes_with_unmap
label coroutine_fn_candidate bdrv_check_request
label coroutine_fn_candidate bdrv_dirty_bitmap_get
label coroutine_fn_candidate bdrv_dirty_bitmap_get_locked
label coroutine_fn_candidate bdrv_dirty_bitmap_lock
label coroutine_fn_candidate bdrv_dirty_bitmap_next_dirty_area
label coroutine_fn_candidate bdrv_dirty_bitmap_next_zero
label coroutine_fn_candidate bdrv_dirty_bitmap_set_inconsistent
label coroutine_fn_candidate bdrv_dirty_bitmap_status
label coroutine_fn_candidate bdrv_dirty_bitmap_truncate
label coroutine_fn_candidate bdrv_dirty_bitmap_unlock
label coroutine_fn_candidate bdrv_dirty_iter_free
label coroutine_fn_candidate bdrv_dirty_iter_new
label coroutine_fn_candidate bdrv_dirty_iter_next
label coroutine_fn_candidate bdrv_has_readonly_bitmaps
label coroutine_fn_candidate bdrv_inc_in_flight
label coroutine_fn_candidate bdrv_min_mem_align
label coroutine_fn_candidate bdrv_pad_request
label coroutine_fn_candidate bdrv_probe_all
label coroutine_fn_candidate bdrv_reset_dirty_bitmap_locked
label coroutine_fn_candidate bdrv_round_to_clusters
label coroutine_fn_candidate bdrv_set_dirty
label coroutine_fn_candidate bdrv_set_dirty_iter
label coroutine_fn_candidate bdrv_write_threshold_check_write
label coroutine_fn_candidate blk_check_byte_request
label coroutine_fn_candidate blkverify_err
label coroutine_fn_candidate block_copy_async
label coroutine_fn_candidate block_copy_call_cancel
label coroutine_fn_candidate block_copy_call_cancelled
label coroutine_fn_candidate block_copy_call_failed
label coroutine_fn_candidate block_copy_call_finished
label coroutine_fn_candidate block_copy_call_free
label coroutine_fn_candidate block_copy_call_status
label coroutine_fn_candidate block_copy_call_succeeded
label coroutine_fn_candidate block_copy_reset_unallocated
label coroutine_fn_candidate block_copy_set_skip_unallocated
label coroutine_fn_candidate block_crypto_co_create_generic
label coroutine_fn_candidate 

Re: [PATCH RFC 0/2] add SHPC hotplug event

2022-11-16 Thread Vladimir Sementsov-Ogievskiy

On 11/16/22 19:23, Michael S. Tsirkin wrote:

On Wed, Nov 16, 2022 at 07:12:32PM +0300, Vladimir Sementsov-Ogievskiy wrote:

Hi all! Please look at 02 for the details.


In the future, pls use --subject-prefix='PATCH RFC' with git-format-patch to
add same prefix to all patches.
If you are resending, add 'resend' in the subject, or increase the
version #.



OK, yes, will do next time. Sorry for the inconvenience :/

--
Best regards,
Vladimir




Re: [PATCH 2/2] qapi: introduce DEVICE_POWER_ON for SHPC hotplug

2022-11-16 Thread Michael S. Tsirkin
On Wed, Nov 16, 2022 at 07:12:34PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Hi all! That's an RFC patch.
> 
> The problem is that SHPC protocol says that power-led is blinking for 5
> seconds before actual turning-on the device. If we call device-del
> during this time the attention button press is ignored and we never get
> DEVICE_DELETED event, which is unexpected for the user.
> 
> I suggest add a pair for DEVICE_DELETED: DEVICE_POWER_ON. So user
> should wait for DEVICE_POWER_ON after device-add before making any
> other operations with the device (incluing device-del).
> 
> What I'm unsure is what about other types of hotplug - PCIE and
> ACPI.. Do they suffer from similar problems?

I didn't yet look at this patchset deeply (we are in freeze anyway)
but PCIE is substancially same as SHPC.

Take a look at Gerd's "improve native hotplug for pcie root ports"
same kind of approach probably works for SHPC.

> Seems not.. Should we sent
> for them this event at some moment of should the user be aware of which
> kind of hotplug is in use to determine to wait for the DEVICE_POWER_ON
> or not to wait.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  hw/pci/shpc.c  | 16 
>  qapi/qdev.json | 23 +++
>  2 files changed, 39 insertions(+)
> 
> diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
> index ba241e2818..7c53971c1c 100644
> --- a/hw/pci/shpc.c
> +++ b/hw/pci/shpc.c
> @@ -1,5 +1,6 @@
>  #include "qemu/osdep.h"
>  #include "qapi/error.h"
> +#include "qapi/qapi-events-qdev.h"
>  #include "qemu/host-utils.h"
>  #include "qemu/range.h"
>  #include "qemu/error-report.h"
> @@ -273,6 +274,18 @@ static void shpc_free_devices_in_slot(SHPCDevice *shpc, 
> int slot)
>  }
>  }
>  
> +static void shpc_devices_power_on_in_slot(SHPCDevice *shpc, int slot)
> +{
> +int devfn;
> +PCIDevice *dev;
> +
> +FOR_EACH_DEVICE_IN_SLOT(shpc, slot, dev, devfn) {
> +DeviceState *ds = DEVICE(dev);
> +
> +qapi_event_send_device_power_on(!!ds->id, ds->id, 
> ds->canonical_path);
> +}
> +}
> +
>  static void shpc_slot_command(SHPCDevice *shpc, uint8_t target,
>uint8_t state, uint8_t power, uint8_t attn)
>  {
> @@ -291,6 +304,9 @@ static void shpc_slot_command(SHPCDevice *shpc, uint8_t 
> target,
>  switch (power) {
>  case SHPC_LED_NO:
>  break;
> +case SHPC_LED_ON:
> +shpc_devices_power_on_in_slot(shpc, slot);
> +__attribute__ ((fallthrough));
>  default:
>  /* TODO: send event to monitor */
>  shpc_set_status(shpc, slot, power, SHPC_SLOT_PWR_LED_MASK);
> diff --git a/qapi/qdev.json b/qapi/qdev.json
> index 2708fb4e99..360dcf8ba6 100644
> --- a/qapi/qdev.json
> +++ b/qapi/qdev.json
> @@ -158,3 +158,26 @@
>  ##
>  { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
>'data': { '*device': 'str', 'path': 'str' } }
> +
> +##
> +# @DEVICE_POWER_ON:
> +#
> +# Emitted whenever power is on for the devices plugged into pci slot.
> +# At this point it's safe to remove the device.
> +#
> +# @device: the device's ID if it has one
> +#
> +# @path: the device's QOM path
> +#
> +# Since: 7.2
> +#
> +# Example:
> +#
> +# <- { "event": "DEVICE_POWER_ON",
> +#  "data": { "device": "virtio-disk-0",
> +#"path": "/machine/peripheral/virtio-disk-0" },
> +#  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
> +#
> +##
> +{ 'event': 'DEVICE_POWER_ON',
> +  'data': { '*device': 'str', 'path': 'str' } }
> -- 
> 2.34.1




Re: [PATCH RFC 0/2] add SHPC hotplug event

2022-11-16 Thread Michael S. Tsirkin
On Wed, Nov 16, 2022 at 07:12:32PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Hi all! Please look at 02 for the details.


In the future, pls use --subject-prefix='PATCH RFC' with git-format-patch to
add same prefix to all patches.
If you are resending, add 'resend' in the subject, or increase the
version #.

> Vladimir Sementsov-Ogievskiy (2):
>   hw/pci/shpc: introduce FOR_EACH_DEVICE_IN_SLOT
>   qapi: introduce DEVICE_POWER_ON for SHPC hotplug
> 
>  hw/pci/shpc.c  | 53 +-
>  qapi/qdev.json | 23 ++
>  2 files changed, 67 insertions(+), 9 deletions(-)
> 
> -- 
> 2.34.1




Re: [PATCH RFC 0/2] add SHPC hotplug event

2022-11-16 Thread Michael S. Tsirkin
On Wed, Nov 16, 2022 at 07:12:32PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> Hi all! Please look at 02 for the details.


I got 2 copies donnu which to reply to.

> Vladimir Sementsov-Ogievskiy (2):
>   hw/pci/shpc: introduce FOR_EACH_DEVICE_IN_SLOT
>   qapi: introduce DEVICE_POWER_ON for SHPC hotplug
> 
>  hw/pci/shpc.c  | 53 +-
>  qapi/qdev.json | 23 ++
>  2 files changed, 67 insertions(+), 9 deletions(-)
> 
> -- 
> 2.34.1




Re: [PATCH v2 3/3] nvme: Add physical writes/reads from OCP log

2022-11-16 Thread Joel Granados
On Tue, Nov 15, 2022 at 12:26:17PM +0100, Klaus Jensen wrote:
> On Nov 14 14:50, Joel Granados wrote:
> > In order to evaluate write amplification factor (WAF) within the storage
> > stack it is important to know the number of bytes written to the
> > controller. The existing SMART log value of Data Units Written is too
> > coarse (given in units of 500 Kb) and so we add the SMART health
> > information extended from the OCP specification (given in units of bytes).
> > 
> > To accomodate different vendor specific specifications like OCP, we add a
> > multiplexing function (nvme_vendor_specific_log) which will route to the
> > different log functions based on arguments and log ids. We only return the
> > OCP extended smart log when the command is 0xC0 and ocp has been turned on
> > in the args.
> > 
> > Though we add the whole nvme smart log extended structure, we only populate
> > the physical_media_units_{read,written}, log_page_version and
> > log_page_uuid.
> > 
> > Signed-off-by: Joel Granados 
> > 
> > squash with main
> > 
> > Signed-off-by: Joel Granados 
> 
> Looks like you slightly messed up the squash ;)
oops. that is my bad

> 
> Also, squash the previous patch (adding the ocp parameter) into this.
Here I wanted to keep the introduction of the argument separate. In any
case, I'll squash it with the other one.

> Please add a note in the documentation (docs/system/devices/nvme.rst)
> about this parameter.
Of course. I always forget documentation. I'll add it under the
"Controller Emulation" section and I'll call it ``ocp``

> 
> > ---
> >  hw/nvme/ctrl.c   | 56 
> >  include/block/nvme.h | 36 
> >  2 files changed, 92 insertions(+)
> > 
> > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > index 220683201a..5e6a8150a2 100644
> > --- a/hw/nvme/ctrl.c
> > +++ b/hw/nvme/ctrl.c
> > @@ -4455,6 +4455,42 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, 
> > struct nvme_stats *stats)
> >  stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
> >  }
> >  
> > +static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae,
> > + uint32_t buf_len, uint64_t 
> > off,
> > + NvmeRequest *req)
> > +{
> > +NvmeNamespace *ns = NULL;
> > +NvmeSmartLogExtended smart_ext = { 0 };
> > +struct nvme_stats stats = { 0 };
> > +uint32_t trans_len;
> > +
> > +if (off >= sizeof(smart_ext)) {
> > +return NVME_INVALID_FIELD | NVME_DNR;
> > +}
> > +
> > +// Accumulate all stats from all namespaces
> 
> Use /* lower-case and no period */ for one sentence, one line comments.
> 
> I think scripts/checkpatch.pl picks this up.
There is a checkpatch like in the kernel. Fantastic! I'll make a note to
use it from now on.


> 
> > +for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
> > +ns = nvme_ns(n, i);
> > +if (ns)
> > +{
> 
> Paranthesis go on the same line as the `if`.
of course

> 
> > +nvme_set_blk_stats(ns, );
> > +}
> > +}
> > +
> > +smart_ext.physical_media_units_written[0] = 
> > cpu_to_le32(stats.units_written);
> > +smart_ext.physical_media_units_read[0] = cpu_to_le32(stats.units_read);
> > +smart_ext.log_page_version = 0x0003;
> > +smart_ext.log_page_uuid[0] = 0xA4F2BFEA2810AFC5;
> > +smart_ext.log_page_uuid[1] = 0xAFD514C97C6F4F9C;
> > +
> > +if (!rae) {
> > +nvme_clear_events(n, NVME_AER_TYPE_SMART);
> > +}
> > +
> > +trans_len = MIN(sizeof(smart_ext) - off, buf_len);
> > +return nvme_c2h(n, (uint8_t *) _ext + off, trans_len, req);
> > +}
> > +
> >  static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
> >  uint64_t off, NvmeRequest *req)
> >  {
> > @@ -4642,6 +4678,24 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, 
> > uint8_t csi, uint32_t buf_len,
> >  return nvme_c2h(n, ((uint8_t *)) + off, trans_len, req);
> >  }
> >  
> > +static uint16_t nvme_vendor_specific_log(uint8_t lid, NvmeCtrl *n, uint8_t 
> > rae,
> > + uint32_t buf_len, uint64_t off,
> > + NvmeRequest *req)
> 
> `NvmeCtrl *n` must be first parameter.
Any reason why this is the case? I'll change it in my code, but would be
nice to understand the reason.


> 
> > +{
> > +NvmeSubsystem *subsys = n->subsys;
> > +switch (lid) {
> > +case NVME_LOG_VENDOR_START:
> 
> In this particular case, I think it is more clear if you simply use the
> hex value directly. The "meaning" of the log page id depends on if or
> not this is an controller implementing the OCP spec.
Agreed

> 
> > +if (subsys->params.ocp) {
> > +return nvme_ocp_extended_smart_info(n, rae, buf_len, off, 
> > req);
> > +}
> > +break;
> > +/* Add a case for each additional vendor 

[PATCH RFC 0/2] add SHPC hotplug event

2022-11-16 Thread Vladimir Sementsov-Ogievskiy
Hi all! Please look at 02 for the details.

Vladimir Sementsov-Ogievskiy (2):
  hw/pci/shpc: introduce FOR_EACH_DEVICE_IN_SLOT
  qapi: introduce DEVICE_POWER_ON for SHPC hotplug

 hw/pci/shpc.c  | 53 +-
 qapi/qdev.json | 23 ++
 2 files changed, 67 insertions(+), 9 deletions(-)

-- 
2.34.1




[PATCH 1/2] hw/pci/shpc: introduce FOR_EACH_DEVICE_IN_SLOT

2022-11-16 Thread Vladimir Sementsov-Ogievskiy
Introduce a macro to loop through devices like in
shpc_free_devices_in_slot(), as we are going to add one more similar
function.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 hw/pci/shpc.c | 37 -
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index e71f3a7483..ba241e2818 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -236,22 +236,41 @@ static void shpc_invalid_command(SHPCDevice *shpc)
SHPC_CMD_STATUS_INVALID_CMD);
 }
 
-static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot)
+static PCIDevice *shpc_next_device_in_slot(SHPCDevice *shpc, int slot,
+   int *start_devfn)
 {
-HotplugHandler *hotplug_ctrl;
 int devfn;
 int pci_slot = SHPC_IDX_TO_PCI(slot);
-for (devfn = PCI_DEVFN(pci_slot, 0);
+
+for (devfn = *start_devfn ?: PCI_DEVFN(pci_slot, 0);
  devfn <= PCI_DEVFN(pci_slot, PCI_FUNC_MAX - 1);
  ++devfn) {
-PCIDevice *affected_dev = shpc->sec_bus->devices[devfn];
-if (affected_dev) {
-hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(affected_dev));
-hotplug_handler_unplug(hotplug_ctrl, DEVICE(affected_dev),
-   _abort);
-object_unparent(OBJECT(affected_dev));
+PCIDevice *dev = shpc->sec_bus->devices[devfn];
+if (dev) {
+*start_devfn = devfn + 1; /* for next iteration */
+return dev;
 }
 }
+
+return NULL;
+}
+
+#define FOR_EACH_DEVICE_IN_SLOT(shpc, slot, dev, devfn) \
+for ((devfn) = 0, \
+ (dev) = shpc_next_device_in_slot((shpc), (slot), &(devfn)); \
+ (dev); (dev) = shpc_next_device_in_slot((shpc), (slot), &(devfn)))
+
+static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot)
+{
+HotplugHandler *hotplug_ctrl;
+int devfn;
+PCIDevice *dev;
+
+FOR_EACH_DEVICE_IN_SLOT(shpc, slot, dev, devfn) {
+hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(dev));
+hotplug_handler_unplug(hotplug_ctrl, DEVICE(dev), _abort);
+object_unparent(OBJECT(dev));
+}
 }
 
 static void shpc_slot_command(SHPCDevice *shpc, uint8_t target,
-- 
2.34.1




[PATCH 2/2] qapi: introduce DEVICE_POWER_ON for SHPC hotplug

2022-11-16 Thread Vladimir Sementsov-Ogievskiy
Hi all! That's an RFC patch.

The problem is that SHPC protocol says that power-led is blinking for 5
seconds before actual turning-on the device. If we call device-del
during this time the attention button press is ignored and we never get
DEVICE_DELETED event, which is unexpected for the user.

I suggest add a pair for DEVICE_DELETED: DEVICE_POWER_ON. So user
should wait for DEVICE_POWER_ON after device-add before making any
other operations with the device (incluing device-del).

What I'm unsure is what about other types of hotplug - PCIE and
ACPI.. Do they suffer from similar problems? Seems not.. Should we sent
for them this event at some moment of should the user be aware of which
kind of hotplug is in use to determine to wait for the DEVICE_POWER_ON
or not to wait.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 hw/pci/shpc.c  | 16 
 qapi/qdev.json | 23 +++
 2 files changed, 39 insertions(+)

diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index ba241e2818..7c53971c1c 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -1,5 +1,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qemu/host-utils.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
@@ -273,6 +274,18 @@ static void shpc_free_devices_in_slot(SHPCDevice *shpc, 
int slot)
 }
 }
 
+static void shpc_devices_power_on_in_slot(SHPCDevice *shpc, int slot)
+{
+int devfn;
+PCIDevice *dev;
+
+FOR_EACH_DEVICE_IN_SLOT(shpc, slot, dev, devfn) {
+DeviceState *ds = DEVICE(dev);
+
+qapi_event_send_device_power_on(!!ds->id, ds->id, ds->canonical_path);
+}
+}
+
 static void shpc_slot_command(SHPCDevice *shpc, uint8_t target,
   uint8_t state, uint8_t power, uint8_t attn)
 {
@@ -291,6 +304,9 @@ static void shpc_slot_command(SHPCDevice *shpc, uint8_t 
target,
 switch (power) {
 case SHPC_LED_NO:
 break;
+case SHPC_LED_ON:
+shpc_devices_power_on_in_slot(shpc, slot);
+__attribute__ ((fallthrough));
 default:
 /* TODO: send event to monitor */
 shpc_set_status(shpc, slot, power, SHPC_SLOT_PWR_LED_MASK);
diff --git a/qapi/qdev.json b/qapi/qdev.json
index 2708fb4e99..360dcf8ba6 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -158,3 +158,26 @@
 ##
 { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
   'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @DEVICE_POWER_ON:
+#
+# Emitted whenever power is on for the devices plugged into pci slot.
+# At this point it's safe to remove the device.
+#
+# @device: the device's ID if it has one
+#
+# @path: the device's QOM path
+#
+# Since: 7.2
+#
+# Example:
+#
+# <- { "event": "DEVICE_POWER_ON",
+#  "data": { "device": "virtio-disk-0",
+#"path": "/machine/peripheral/virtio-disk-0" },
+#  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+#
+##
+{ 'event': 'DEVICE_POWER_ON',
+  'data': { '*device': 'str', 'path': 'str' } }
-- 
2.34.1




Re: [PATCH maybe-7.2 1/3] hw/i2c: only schedule pending master when bus is idle

2022-11-16 Thread Cédric Le Goater

On 11/16/22 09:43, Klaus Jensen wrote:

From: Klaus Jensen 

It is not given that the current master will release the bus after a
transfer ends. Only schedule a pending master if the bus is idle.

Fixes: 37fa5ca42623 ("hw/i2c: support multiple masters")
Signed-off-by: Klaus Jensen 
---
  hw/i2c/aspeed_i2c.c  |  2 ++
  hw/i2c/core.c| 37 ++---
  include/hw/i2c/i2c.h |  2 ++
  3 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
index c166fd20fa11..1f071a3811f7 100644
--- a/hw/i2c/aspeed_i2c.c
+++ b/hw/i2c/aspeed_i2c.c
@@ -550,6 +550,8 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, 
uint64_t value)
  }
  SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_STOP_CMD, 0);
  aspeed_i2c_set_state(bus, I2CD_IDLE);
+
+i2c_schedule_pending_master(bus->bus);


Shouldn't it be i2c_bus_release() ?

Thanks,

C.



  }
  
  if (aspeed_i2c_bus_pkt_mode_en(bus)) {

diff --git a/hw/i2c/core.c b/hw/i2c/core.c
index d4ba8146bffb..bed594fe599b 100644
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@@ -185,22 +185,39 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, bool 
is_recv)
  
  void i2c_bus_master(I2CBus *bus, QEMUBH *bh)

  {
+I2CPendingMaster *node = g_new(struct I2CPendingMaster, 1);
+node->bh = bh;
+
+QSIMPLEQ_INSERT_TAIL(>pending_masters, node, entry);
+}
+
+void i2c_schedule_pending_master(I2CBus *bus)
+{
+I2CPendingMaster *node;
+
  if (i2c_bus_busy(bus)) {
-I2CPendingMaster *node = g_new(struct I2CPendingMaster, 1);
-node->bh = bh;
-
-QSIMPLEQ_INSERT_TAIL(>pending_masters, node, entry);
+/* someone is already controlling the bus; wait for it to release it */
+return;
+}
  
+if (QSIMPLEQ_EMPTY(>pending_masters)) {

  return;
  }
  
-bus->bh = bh;

+node = QSIMPLEQ_FIRST(>pending_masters);
+bus->bh = node->bh;
+
+QSIMPLEQ_REMOVE_HEAD(>pending_masters, entry);
+g_free(node);
+
  qemu_bh_schedule(bus->bh);
  }
  
  void i2c_bus_release(I2CBus *bus)

  {
  bus->bh = NULL;
+
+i2c_schedule_pending_master(bus);
  }
  
  int i2c_start_recv(I2CBus *bus, uint8_t address)

@@ -234,16 +251,6 @@ void i2c_end_transfer(I2CBus *bus)
  g_free(node);
  }
  bus->broadcast = false;
-
-if (!QSIMPLEQ_EMPTY(>pending_masters)) {
-I2CPendingMaster *node = QSIMPLEQ_FIRST(>pending_masters);
-bus->bh = node->bh;
-
-QSIMPLEQ_REMOVE_HEAD(>pending_masters, entry);
-g_free(node);
-
-qemu_bh_schedule(bus->bh);
-}
  }
  
  int i2c_send(I2CBus *bus, uint8_t data)

diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
index 9b9581d23097..2a3abacd1ba6 100644
--- a/include/hw/i2c/i2c.h
+++ b/include/hw/i2c/i2c.h
@@ -141,6 +141,8 @@ int i2c_start_send(I2CBus *bus, uint8_t address);
   */
  int i2c_start_send_async(I2CBus *bus, uint8_t address);
  
+void i2c_schedule_pending_master(I2CBus *bus);

+
  void i2c_end_transfer(I2CBus *bus);
  void i2c_nack(I2CBus *bus);
  void i2c_ack(I2CBus *bus);





Re: [PATCH 2/2] pci: drop redundant PCIDeviceClass::is_bridge field

2022-11-16 Thread Igor Mammedov
On Wed, 16 Nov 2022 16:35:10 +0100
Philippe Mathieu-Daudé  wrote:

> On 16/11/22 16:27, Igor Mammedov wrote:
> > and use cast to TYPE_PCI_BRIDGE instead.
> > 
> > Signed-off-by: Igor Mammedov 
> > ---
> >   include/hw/pci/pci.h   | 11 +--
> >   include/hw/pci/pci_bridge.h|  1 +
> >   hw/acpi/pcihp.c|  3 +--
> >   hw/i386/acpi-build.c   |  5 ++---
> >   hw/pci-bridge/cxl_downstream.c |  1 -
> >   hw/pci-bridge/cxl_upstream.c   |  1 -
> >   hw/pci-bridge/i82801b11.c  |  1 -
> >   hw/pci-bridge/pci_bridge_dev.c |  1 -
> >   hw/pci-bridge/pcie_pci_bridge.c|  1 -
> >   hw/pci-bridge/pcie_root_port.c |  1 -
> >   hw/pci-bridge/simba.c  |  1 -
> >   hw/pci-bridge/xio3130_downstream.c |  1 -
> >   hw/pci-bridge/xio3130_upstream.c   |  1 -
> >   hw/pci-host/designware.c   |  1 -
> >   hw/pci-host/xilinx-pcie.c  |  1 -
> >   hw/pci/pci.c   | 20 +---
> >   hw/ppc/spapr_pci.c | 15 +--
> >   17 files changed, 19 insertions(+), 47 deletions(-)  
> 
> > @@ -1090,9 +1088,10 @@ static PCIDevice *do_pci_register_device(PCIDevice 
> > *pci_dev,
> >   Error *local_err = NULL;
> >   DeviceState *dev = DEVICE(pci_dev);
> >   PCIBus *bus = pci_get_bus(pci_dev);
> > +bool is_bridge = IS_PCI_BRIDGE(pci_dev);
> >   
> >   /* Only pci bridges can be attached to extra PCI root buses */
> > -if (pci_bus_is_root(bus) && bus->parent_dev && !pc->is_bridge) {
> > +if (pci_bus_is_root(bus) && bus->parent_dev && 
> > !IS_PCI_BRIDGE(pci_dev)) {  
> 
> Can we use the recently assigned 'is_bridge' variable?

yep, that was an intention behind the variable.
I'll fix it up on respin.

> 
> Otherwise:
> 
> Reviewed-by: Philippe Mathieu-Daudé 
> 




Re: [PATCH for-7.2] rtl8139: honor large send MSS value

2022-11-16 Thread Stefan Hajnoczi
I have sent a v2 with a fixed MSS mask constant but haven't tested it.

Thanks,
Stefan



[PATCH for-7.2 v2] rtl8139: honor large send MSS value

2022-11-16 Thread Stefan Hajnoczi
The Large-Send Task Offload Tx Descriptor (9.2.1 Transmit) has a
Large-Send MSS value where the driver specifies the MSS. See the
datasheet here:
http://realtek.info/pdf/rtl8139cp.pdf

The code ignores this value and uses a hardcoded MSS of 1500 bytes
instead. When the MTU is less than 1500 bytes the hardcoded value
results in IP fragmentation and poor performance.

Use the Large-Send MSS value to correctly size Large-Send packets.

Jason Wang  noticed that the Large-Send MSS value
mask was incorrect so it is adjusted to match the datasheet and Linux
8139cp driver.

This issue was discussed in the past here:
https://lore.kernel.org/all/20161114162505.GD26664@stefanha-x1.localdomain/

Reported-by: Russell King - ARM Linux 
Reported-by: Tobias Fiebig 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1312
Signed-off-by: Stefan Hajnoczi 
---
 hw/net/rtl8139.c | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

v2:
- Fix MSS mask [Jason]

diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index e6643e3c9d..6c406f39ce 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -77,7 +77,6 @@
 ( ( input ) & ( size - 1 )  )
 
 #define ETHER_TYPE_LEN 2
-#define ETH_MTU 1500
 
 #define VLAN_TCI_LEN 2
 #define VLAN_HLEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
@@ -1934,8 +1933,9 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 #define CP_TX_LS (1<<28)
 /* large send packet flag */
 #define CP_TX_LGSEN (1<<27)
-/* large send MSS mask, bits 16...25 */
-#define CP_TC_LGSEN_MSS_MASK ((1 << 12) - 1)
+/* large send MSS mask, bits 16...26 */
+#define CP_TC_LGSEN_MSS_SHIFT 16
+#define CP_TC_LGSEN_MSS_MASK ((1 << 11) - 1)
 
 /* IP checksum offload flag */
 #define CP_TX_IPCS (1<<18)
@@ -2149,10 +2149,11 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 goto skip_offload;
 }
 
-int large_send_mss = (txdw0 >> 16) & CP_TC_LGSEN_MSS_MASK;
+int large_send_mss = (txdw0 >> CP_TC_LGSEN_MSS_SHIFT) &
+ CP_TC_LGSEN_MSS_MASK;
 
-DPRINTF("+++ C+ mode offloaded task TSO MTU=%d IP data %d "
-"frame data %d specified MSS=%d\n", ETH_MTU,
+DPRINTF("+++ C+ mode offloaded task TSO IP data %d "
+"frame data %d specified MSS=%d\n",
 ip_data_len, saved_size - ETH_HLEN, large_send_mss);
 
 int tcp_send_offset = 0;
@@ -2177,9 +2178,13 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
 goto skip_offload;
 }
 
-/* ETH_MTU = ip header len + tcp header len + payload */
+/* MSS too small? */
+if (tcp_hlen + hlen >= large_send_mss) {
+goto skip_offload;
+}
+
 int tcp_data_len = ip_data_len - tcp_hlen;
-int tcp_chunk_size = ETH_MTU - hlen - tcp_hlen;
+int tcp_chunk_size = large_send_mss - hlen - tcp_hlen;
 
 DPRINTF("+++ C+ mode TSO IP data len %d TCP hlen %d TCP "
 "data len %d TCP chunk size %d\n", ip_data_len,
-- 
2.38.1




RE: [PULL 00/30] Next patches

2022-11-16 Thread Xu, Ling1
Hi, All,
  Very appreciated for your time on reviewing our patch.
  The second CI failure caused by our patch has been addressed. One simple 
way is moving "#endif" in qemu/tests/bench/xbzrle-bench.c from line 46 to line 
450.
We have submitted patch v7 to update this modification. Thanks for your time 
again.

Best Regards,
Ling
  

-Original Message-
From: Stefan Hajnoczi  
Sent: Wednesday, November 16, 2022 2:58 AM
To: Juan Quintela ; Xu, Ling1 ; Zhao, 
Zhou ; Jin, Jun I 
Cc: qemu-devel@nongnu.org; Michael Tokarev ; Marc-André Lureau 
; David Hildenbrand ; Laurent 
Vivier ; Paolo Bonzini ; Daniel P. 
Berrangé ; Peter Xu ; Stefan Hajnoczi 
; Dr. David Alan Gilbert ; Thomas 
Huth ; qemu-bl...@nongnu.org; qemu-triv...@nongnu.org; 
Philippe Mathieu-Daudé ; Fam Zheng 
Subject: Re: [PULL 00/30] Next patches

On Tue, 15 Nov 2022 at 10:40, Juan Quintela  wrote:
>
> The following changes since commit 98f10f0e2613ba1ac2ad3f57a5174014f6dcb03d:
>
>   Merge tag 'pull-target-arm-20221114' of 
> https://git.linaro.org/people/pmaydell/qemu-arm into staging 
> (2022-11-14 13:31:17 -0500)
>
> are available in the Git repository at:
>
>   https://gitlab.com/juan.quintela/qemu.git tags/next-pull-request
>
> for you to fetch changes up to d896a7a40db13fc2d05828c94ddda2747530089c:
>
>   migration: Block migration comment or code is wrong (2022-11-15 
> 10:31:06 +0100)
>
> 
> Migration PULL request (take 2)
>
> Hi
>
> This time properly signed.
>
> [take 1]
> It includes:
> - Leonardo fix for zero_copy flush
> - Fiona fix for return value of readv/writev
> - Peter Xu cleanups
> - Peter Xu preempt patches
> - Patches ready from zero page (me)
> - AVX2 support (ling)
> - fix for slow networking and reordering of first packets (manish)
>
> Please, apply.
>
> 
>
> Fiona Ebner (1):
>   migration/channel-block: fix return value for
> qio_channel_block_{readv,writev}
>
> Juan Quintela (5):
>   multifd: Create page_size fields into both MultiFD{Recv,Send}Params
>   multifd: Create page_count fields into both MultiFD{Recv,Send}Params
>   migration: Export ram_transferred_ram()
>   migration: Export ram_release_page()
>   migration: Block migration comment or code is wrong
>
> Leonardo Bras (1):
>   migration/multifd/zero-copy: Create helper function for flushing
>
> Peter Xu (20):
>   migration: Fix possible infinite loop of ram save process
>   migration: Fix race on qemu_file_shutdown()
>   migration: Disallow postcopy preempt to be used with compress
>   migration: Use non-atomic ops for clear log bitmap
>   migration: Disable multifd explicitly with compression
>   migration: Take bitmap mutex when completing ram migration
>   migration: Add postcopy_preempt_active()
>   migration: Cleanup xbzrle zero page cache update logic
>   migration: Trivial cleanup save_page_header() on same block check
>   migration: Remove RAMState.f references in compression code
>   migration: Yield bitmap_mutex properly when sending/sleeping
>   migration: Use atomic ops properly for page accountings
>   migration: Teach PSS about host page
>   migration: Introduce pss_channel
>   migration: Add pss_init()
>   migration: Make PageSearchStatus part of RAMState
>   migration: Move last_sent_block into PageSearchStatus
>   migration: Send requested page directly in rp-return thread
>   migration: Remove old preempt code around state maintainance
>   migration: Drop rs->f
>
> ling xu (2):
>   Update AVX512 support for xbzrle_encode_buffer
>   Unit test code and benchmark code

This commit causes the following CI failure:

cc -m64 -mcx16 -Ilibauthz.fa.p -I. -I.. -Iqapi -Itrace -Iui/shader
-I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include
-fdiagnostics-color=auto -Wall -Winvalid-pch -Werror -std=gnu11 -O2 -g -isystem 
/builds/qemu-project/qemu/linux-headers -isystem linux-headers -iquote . 
-iquote /builds/qemu-project/qemu -iquote /builds/qemu-project/qemu/include 
-iquote
/builds/qemu-project/qemu/tcg/i386 -pthread -U_FORTIFY_SOURCE
-D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE 
-Wstrict-prototypes -Wredundant-decls -Wundef -Wwrite-strings 
-Wmissing-prototypes -fno-strict-aliasing -fno-common -fwrapv 
-Wold-style-declaration -Wold-style-definition -Wtype-limits -Wformat-security 
-Wformat-y2k -Winit-self -Wignored-qualifiers -Wempty-body -Wnested-externs 
-Wendif-labels -Wexpansion-to-defined
-Wimplicit-fallthrough=2 -Wno-missing-include-dirs -Wno-shift-negative-value 
-Wno-psabi -fstack-protector-strong -fPIE -MD -MQ 
libauthz.fa.p/authz_simple.c.o -MF libauthz.fa.p/authz_simple.c.o.d -o 
libauthz.fa.p/authz_simple.c.o -c ../authz/simple.c In file included from 
../authz/simple.c:23:
../authz/trace.h:1:10: fatal error: trace/trace-authz.h: No such file or 
directory
1 | #include "trace/trace-authz.h"
| ^


Re: [PATCH 2/2] pci: drop redundant PCIDeviceClass::is_bridge field

2022-11-16 Thread Philippe Mathieu-Daudé

On 16/11/22 16:27, Igor Mammedov wrote:

and use cast to TYPE_PCI_BRIDGE instead.

Signed-off-by: Igor Mammedov 
---
  include/hw/pci/pci.h   | 11 +--
  include/hw/pci/pci_bridge.h|  1 +
  hw/acpi/pcihp.c|  3 +--
  hw/i386/acpi-build.c   |  5 ++---
  hw/pci-bridge/cxl_downstream.c |  1 -
  hw/pci-bridge/cxl_upstream.c   |  1 -
  hw/pci-bridge/i82801b11.c  |  1 -
  hw/pci-bridge/pci_bridge_dev.c |  1 -
  hw/pci-bridge/pcie_pci_bridge.c|  1 -
  hw/pci-bridge/pcie_root_port.c |  1 -
  hw/pci-bridge/simba.c  |  1 -
  hw/pci-bridge/xio3130_downstream.c |  1 -
  hw/pci-bridge/xio3130_upstream.c   |  1 -
  hw/pci-host/designware.c   |  1 -
  hw/pci-host/xilinx-pcie.c  |  1 -
  hw/pci/pci.c   | 20 +---
  hw/ppc/spapr_pci.c | 15 +--
  17 files changed, 19 insertions(+), 47 deletions(-)



@@ -1090,9 +1088,10 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev,
  Error *local_err = NULL;
  DeviceState *dev = DEVICE(pci_dev);
  PCIBus *bus = pci_get_bus(pci_dev);
+bool is_bridge = IS_PCI_BRIDGE(pci_dev);
  
  /* Only pci bridges can be attached to extra PCI root buses */

-if (pci_bus_is_root(bus) && bus->parent_dev && !pc->is_bridge) {
+if (pci_bus_is_root(bus) && bus->parent_dev && !IS_PCI_BRIDGE(pci_dev)) {


Can we use the recently assigned 'is_bridge' variable?

Otherwise:

Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH] target/ppc: Fix build warnings when building with 'disable-tcg'

2022-11-16 Thread Philippe Mathieu-Daudé

On 16/11/22 16:20, Greg Kurz wrote:

Hi Vaibhav,

Nice to see some people are still building QEMU at IBM ;-)

On Wed, 16 Nov 2022 18:47:43 +0530
Vaibhav Jain  wrote:


Kowshik reported that building qemu with GCC 12.2.1 for 'ppc64-softmmu'
target is failing due to following build warnings:


  ../target/ppc/cpu_init.c:7018:13: error: 'ppc_restore_state_to_opc' defined 
but not used [-Werror=unused-function]
  7018 | static void ppc_restore_state_to_opc(CPUState *cs,


Fix this by wrapping these function definitions in 'ifdef CONFIG_TCG' so that
they are only defined if qemu is compiled with '--enable-tcg'


Interestingly this config isn't covered in 
.gitlab-ci.d/custom-runners/ubuntu-20.04-s390x.yml.





[PATCH v7 0/2] Update AVX512 support for xbzrle and CI failure

2022-11-16 Thread ling xu
This patch updates code of avx512 support for xbzrle_encode_buffer function. 
We mainly modified code in xbzrle-bench.c for addressing CI failure.

Signed-off-by: ling xu 
Co-authored-by: Zhou Zhao 
Co-authored-by: Jun Jin 

ling xu (2):
  Update AVX512 support for xbzrle_encode_buffer
  Unit test code and benchmark code

 meson.build|  16 ++
 meson_options.txt  |   2 +
 migration/ram.c|  34 ++-
 migration/xbzrle.c | 124 ++
 migration/xbzrle.h |   4 +
 tests/bench/meson.build|   4 +
 tests/bench/xbzrle-bench.c | 469 +
 tests/unit/test-xbzrle.c   |  39 ++-
 8 files changed, 684 insertions(+), 8 deletions(-)
 create mode 100644 tests/bench/xbzrle-bench.c

-- 
2.25.1




[PATCH v7 2/2] Update bench-code for addressing CI problem

2022-11-16 Thread ling xu
Unit test code is in test-xbzrle.c, and benchmark code is in xbzrle-bench.c
for performance benchmarking. we have modified xbzrle-bench.c to address
CI problem.

Signed-off-by: ling xu 
Co-authored-by: Zhou Zhao 
Co-authored-by: Jun Jin 
---
 tests/bench/meson.build|   4 +
 tests/bench/xbzrle-bench.c | 469 +
 tests/unit/test-xbzrle.c   |  39 ++-
 3 files changed, 507 insertions(+), 5 deletions(-)
 create mode 100644 tests/bench/xbzrle-bench.c

diff --git a/tests/bench/meson.build b/tests/bench/meson.build
index 279a8fcc33..daefead58d 100644
--- a/tests/bench/meson.build
+++ b/tests/bench/meson.build
@@ -3,6 +3,10 @@ qht_bench = executable('qht-bench',
sources: 'qht-bench.c',
dependencies: [qemuutil])
 
+xbzrle_bench = executable('xbzrle-bench',
+   sources: 'xbzrle-bench.c',
+   dependencies: [qemuutil,migration])
+
 executable('atomic_add-bench',
sources: files('atomic_add-bench.c'),
dependencies: [qemuutil],
diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c
new file mode 100644
index 00..8848a3a32d
--- /dev/null
+++ b/tests/bench/xbzrle-bench.c
@@ -0,0 +1,469 @@
+/*
+ * Xor Based Zero Run Length Encoding unit tests.
+ *
+ * Copyright 2013 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *  Orit Wasserman  
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "../migration/xbzrle.h"
+
+#if defined(CONFIG_AVX512BW_OPT)
+#define XBZRLE_PAGE_SIZE 4096
+static bool is_cpu_support_avx512bw;
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+unsigned max = __get_cpuid_max(0, NULL);
+int a, b, c, d;
+is_cpu_support_avx512bw = false;
+if (max >= 1) {
+__cpuid(1, a, b, c, d);
+ /* We must check that AVX is not just available, but usable.  */
+if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+int bv;
+__asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+__cpuid_count(7, 0, a, b, c, d);
+   /* 0xe6:
+*  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+*and ZMM16-ZMM31 state are enabled by OS)
+*  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+*/
+if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+is_cpu_support_avx512bw = true;
+}
+}
+}
+return ;
+}
+
+struct ResTime {
+float t_raw;
+float t_512;
+};
+
+
+/* Function prototypes
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+uint8_t *dst, int dlen);
+*/
+static void encode_decode_zero(struct ResTime *res)
+{
+uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+int i = 0;
+int dlen = 0, dlen512 = 0;
+int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+for (i = diff_len; i > 0; i--) {
+buffer[1000 + i] = i;
+buffer512[1000 + i] = i;
+}
+
+buffer[1000 + diff_len + 3] = 103;
+buffer[1000 + diff_len + 5] = 105;
+
+buffer512[1000 + diff_len + 3] = 103;
+buffer512[1000 + diff_len + 5] = 105;
+
+/* encode zero page */
+time_t t_start, t_end, t_start512, t_end512;
+t_start = clock();
+dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
+   XBZRLE_PAGE_SIZE);
+t_end = clock();
+float time_val = difftime(t_end, t_start);
+g_assert(dlen == 0);
+
+t_start512 = clock();
+dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, 
XBZRLE_PAGE_SIZE,
+   compressed512, XBZRLE_PAGE_SIZE);
+t_end512 = clock();
+float time_val512 = difftime(t_end512, t_start512);
+g_assert(dlen512 == 0);
+
+res->t_raw = time_val;
+res->t_512 = time_val512;
+
+g_free(buffer);
+g_free(compressed);
+g_free(buffer512);
+g_free(compressed512);
+
+}
+
+static void test_encode_decode_zero_avx512(void)
+{
+int i;
+float time_raw = 0.0, time_512 = 0.0;
+struct ResTime res;
+for (i = 0; i < 1; i++) {
+encode_decode_zero();
+time_raw += res.t_raw;
+time_512 += res.t_512;
+}
+printf("Zero test:\n");
+printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_unchanged(struct ResTime *res)
+{
+uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+uint8_t 

[PATCH v7 1/2] AVX512 support for xbzrle_encode_buffer

2022-11-16 Thread ling xu
This commit is the same with [PATCH v6 1/2], and provides avx512 support for 
xbzrle_encode_buffer
function to accelerate xbzrle encoding speed. Runtime check of avx512
support and benchmark for this feature are added. Compared with C
version of xbzrle_encode_buffer function, avx512 version can achieve
50%-70% performance improvement on benchmarking. In addition, if dirty
data is randomly located in 4K page, the avx512 version can achieve
almost 140% performance gain.

Signed-off-by: ling xu 
Co-authored-by: Zhou Zhao 
Co-authored-by: Jun Jin 
---
 meson.build|  16 ++
 meson_options.txt  |   2 +
 migration/ram.c|  34 +++--
 migration/xbzrle.c | 124 +
 migration/xbzrle.h |   4 ++
 5 files changed, 177 insertions(+), 3 deletions(-)

diff --git a/meson.build b/meson.build
index cf3e517e56..d0d28f5c9e 100644
--- a/meson.build
+++ b/meson.build
@@ -2344,6 +2344,22 @@ config_host_data.set('CONFIG_AVX512F_OPT', 
get_option('avx512f') \
 int main(int argc, char *argv[]) { return bar(argv[argc - 1]); }
   '''), error_message: 'AVX512F not available').allowed())
 
+config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
+  .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable 
AVX512BW') \
+  .require(cc.links('''
+#pragma GCC push_options
+#pragma GCC target("avx512bw")
+#include 
+#include 
+static int bar(void *a) {
+
+  __m512i *x = a;
+  __m512i res= _mm512_abs_epi8(*x);
+  return res[1];
+}
+int main(int argc, char *argv[]) { return bar(argv[0]); }
+  '''), error_message: 'AVX512BW not available').allowed())
+
 have_pvrdma = get_option('pvrdma') \
   .require(rdma.found(), error_message: 'PVRDMA requires OpenFabrics 
libraries') \
   .require(cc.compiles(gnu_source_prefix + '''
diff --git a/meson_options.txt b/meson_options.txt
index 66128178bf..96814dd211 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -104,6 +104,8 @@ option('avx2', type: 'feature', value: 'auto',
description: 'AVX2 optimizations')
 option('avx512f', type: 'feature', value: 'disabled',
description: 'AVX512F optimizations')
+option('avx512bw', type: 'feature', value: 'auto',
+   description: 'AVX512BW optimizations')
 option('keyring', type: 'feature', value: 'auto',
description: 'Linux keyring support')
 
diff --git a/migration/ram.c b/migration/ram.c
index dc1de9ddbc..ff4c15c9c3 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -83,6 +83,34 @@
 /* 0x80 is reserved in migration.h start with 0x100 next */
 #define RAM_SAVE_FLAG_COMPRESS_PAGE0x100
 
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+ uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+unsigned max = __get_cpuid_max(0, NULL);
+int a, b, c, d;
+if (max >= 1) {
+__cpuid(1, a, b, c, d);
+ /* We must check that AVX is not just available, but usable.  */
+if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+int bv;
+__asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+__cpuid_count(7, 0, a, b, c, d);
+   /* 0xe6:
+*  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+*and ZMM16-ZMM31 state are enabled by OS)
+*  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+*/
+if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+}
+}
+}
+}
+#endif
+
 XBZRLECacheStats xbzrle_counters;
 
 /* struct contains XBZRLE cache and a static page
@@ -802,9 +830,9 @@ static int save_xbzrle_page(RAMState *rs, uint8_t 
**current_data,
 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 
 /* XBZRLE encoding (if there is no overflow) */
-encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
-   TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
-   TARGET_PAGE_SIZE);
+encoded_len = xbzrle_encode_buffer_func(prev_cached_page, 
XBZRLE.current_buf,
+TARGET_PAGE_SIZE, 
XBZRLE.encoded_buf,
+TARGET_PAGE_SIZE);
 
 /*
  * Update the cache contents, so that it corresponds to the data
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
index 1ba482ded9..05366e86c0 100644
--- a/migration/xbzrle.c
+++ b/migration/xbzrle.c
@@ -174,3 +174,127 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t 
*dst, int dlen)
 
 return d;
 }
+
+#if defined(CONFIG_AVX512BW_OPT)
+#pragma GCC push_options
+#pragma GCC target("avx512bw")
+#include 
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+ 

[PATCH 1/2] remove DEC 21154 PCI bridge

2022-11-16 Thread Igor Mammedov
Code has not been used practically since its inception (2004)
  f2aa58c6f4a20 UniNorth PCI bridge support
or maybe even earlier, but it was consuming contributors time
as QEMU was being rewritten.
Drop it for now. Whomever would like to actually
use the thing, can make sure it actually works/reintroduce
it back when there is a user.

PS:
I've stumbled upon this when replacing PCIDeviceClass::is_bridge
field with QOM cast to PCI_BRIDGE type. Unused DEC 21154
was the only one trying to use the field with plain PCIDevice.
It's not worth keeping the field around for the sake of the code
that was commented out 'forever'.

Signed-off-by: Igor Mammedov 
---
 hw/pci-bridge/dec.h   |   9 ---
 include/hw/pci/pci_ids.h  |   1 -
 hw/pci-bridge/dec.c   | 164 --
 hw/pci-bridge/meson.build |   2 -
 hw/pci-host/uninorth.c|   6 --
 5 files changed, 182 deletions(-)
 delete mode 100644 hw/pci-bridge/dec.h
 delete mode 100644 hw/pci-bridge/dec.c

diff --git a/hw/pci-bridge/dec.h b/hw/pci-bridge/dec.h
deleted file mode 100644
index 869e90b136..00
--- a/hw/pci-bridge/dec.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef HW_PCI_BRIDGE_DEC_H
-#define HW_PCI_BRIDGE_DEC_H
-
-
-#define TYPE_DEC_21154 "dec-21154-sysbus"
-
-PCIBus *pci_dec_21154_init(PCIBus *parent_bus, int devfn);
-
-#endif
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index bc9f834fd1..e4386ebb20 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -169,7 +169,6 @@
 
 #define PCI_VENDOR_ID_DEC0x1011
 #define PCI_DEVICE_ID_DEC_21143  0x0019
-#define PCI_DEVICE_ID_DEC_21154  0x0026
 
 #define PCI_VENDOR_ID_CIRRUS 0x1013
 
diff --git a/hw/pci-bridge/dec.c b/hw/pci-bridge/dec.c
deleted file mode 100644
index 4773d07e6d..00
--- a/hw/pci-bridge/dec.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * QEMU DEC 21154 PCI bridge
- *
- * Copyright (c) 2006-2007 Fabrice Bellard
- * Copyright (c) 2007 Jocelyn Mayer
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to 
deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "dec.h"
-#include "hw/sysbus.h"
-#include "qapi/error.h"
-#include "qemu/module.h"
-#include "hw/pci/pci.h"
-#include "hw/pci/pci_host.h"
-#include "hw/pci/pci_bridge.h"
-#include "hw/pci/pci_bus.h"
-#include "qom/object.h"
-
-OBJECT_DECLARE_SIMPLE_TYPE(DECState, DEC_21154)
-
-struct DECState {
-PCIHostState parent_obj;
-};
-
-static int dec_map_irq(PCIDevice *pci_dev, int irq_num)
-{
-return irq_num;
-}
-
-static void dec_pci_bridge_realize(PCIDevice *pci_dev, Error **errp)
-{
-pci_bridge_initfn(pci_dev, TYPE_PCI_BUS);
-}
-
-static void dec_21154_pci_bridge_class_init(ObjectClass *klass, void *data)
-{
-DeviceClass *dc = DEVICE_CLASS(klass);
-PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
-
-set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
-k->realize = dec_pci_bridge_realize;
-k->exit = pci_bridge_exitfn;
-k->vendor_id = PCI_VENDOR_ID_DEC;
-k->device_id = PCI_DEVICE_ID_DEC_21154;
-k->config_write = pci_bridge_write_config;
-k->is_bridge = true;
-dc->desc = "DEC 21154 PCI-PCI bridge";
-dc->reset = pci_bridge_reset;
-dc->vmsd = _pci_device;
-}
-
-static const TypeInfo dec_21154_pci_bridge_info = {
-.name  = "dec-21154-p2p-bridge",
-.parent= TYPE_PCI_BRIDGE,
-.instance_size = sizeof(PCIBridge),
-.class_init= dec_21154_pci_bridge_class_init,
-.interfaces = (InterfaceInfo[]) {
-{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
-{ },
-},
-};
-
-PCIBus *pci_dec_21154_init(PCIBus *parent_bus, int devfn)
-{
-PCIDevice *dev;
-PCIBridge *br;
-
-dev = pci_new_multifunction(devfn, false, "dec-21154-p2p-bridge");
-br = PCI_BRIDGE(dev);
-pci_bridge_map_irq(br, "DEC 21154 PCI-PCI bridge", dec_map_irq);
-pci_realize_and_unref(dev, parent_bus, _fatal);
-return pci_bridge_get_sec_bus(br);
-}
-
-static void 

[PATCH 2/2] pci: drop redundant PCIDeviceClass::is_bridge field

2022-11-16 Thread Igor Mammedov
and use cast to TYPE_PCI_BRIDGE instead.

Signed-off-by: Igor Mammedov 
---
 include/hw/pci/pci.h   | 11 +--
 include/hw/pci/pci_bridge.h|  1 +
 hw/acpi/pcihp.c|  3 +--
 hw/i386/acpi-build.c   |  5 ++---
 hw/pci-bridge/cxl_downstream.c |  1 -
 hw/pci-bridge/cxl_upstream.c   |  1 -
 hw/pci-bridge/i82801b11.c  |  1 -
 hw/pci-bridge/pci_bridge_dev.c |  1 -
 hw/pci-bridge/pcie_pci_bridge.c|  1 -
 hw/pci-bridge/pcie_root_port.c |  1 -
 hw/pci-bridge/simba.c  |  1 -
 hw/pci-bridge/xio3130_downstream.c |  1 -
 hw/pci-bridge/xio3130_upstream.c   |  1 -
 hw/pci-host/designware.c   |  1 -
 hw/pci-host/xilinx-pcie.c  |  1 -
 hw/pci/pci.c   | 20 +---
 hw/ppc/spapr_pci.c | 15 +--
 17 files changed, 19 insertions(+), 47 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 6ccaaf5154..8b3a8571bf 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -250,16 +250,7 @@ struct PCIDeviceClass {
 uint16_t class_id;
 uint16_t subsystem_vendor_id;   /* only for header type = 0 */
 uint16_t subsystem_id;  /* only for header type = 0 */
-
-/*
- * pci-to-pci bridge or normal device.
- * This doesn't mean pci host switch.
- * When card bus bridge is supported, this would be enhanced.
- */
-bool is_bridge;
-
-/* rom bar */
-const char *romfile;
+const char *romfile;/* rom bar */
 };
 
 typedef void (*PCIINTxRoutingNotifier)(PCIDevice *dev);
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index ba4bafac7c..ca6caf487e 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -53,6 +53,7 @@ struct PCIBridgeWindows {
 
 #define TYPE_PCI_BRIDGE "base-pci-bridge"
 OBJECT_DECLARE_SIMPLE_TYPE(PCIBridge, PCI_BRIDGE)
+#define IS_PCI_BRIDGE(dev) object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)
 
 struct PCIBridge {
 /*< private >*/
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index 84d75e6b84..99a898d9ae 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -186,7 +186,6 @@ static PCIBus *acpi_pcihp_find_hotplug_bus(AcpiPciHpState 
*s, int bsel)
 
 static bool acpi_pcihp_pc_no_hotplug(AcpiPciHpState *s, PCIDevice *dev)
 {
-PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
 DeviceClass *dc = DEVICE_GET_CLASS(dev);
 /*
  * ACPI doesn't allow hotplug of bridge devices.  Don't allow
@@ -196,7 +195,7 @@ static bool acpi_pcihp_pc_no_hotplug(AcpiPciHpState *s, 
PCIDevice *dev)
  * Don't allow hot-unplug of SR-IOV Virtual Functions, as they
  * will be removed implicitly, when Physical Function is unplugged.
  */
-return (pc->is_bridge && !dev->qdev.hotplugged) || !dc->hotpluggable ||
+return (IS_PCI_BRIDGE(dev) && !dev->qdev.hotplugged) || !dc->hotpluggable 
||
pci_is_vf(dev);
 }
 
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index d9eaa5fc4d..aa15b11cde 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -403,7 +403,6 @@ static void build_append_pci_bus_devices(Aml *parent_scope, 
PCIBus *bus,
 
 for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
 DeviceClass *dc;
-PCIDeviceClass *pc;
 PCIDevice *pdev = bus->devices[devfn];
 int slot = PCI_SLOT(devfn);
 int func = PCI_FUNC(devfn);
@@ -414,14 +413,14 @@ static void build_append_pci_bus_devices(Aml 
*parent_scope, PCIBus *bus,
 bool cold_plugged_bridge = false;
 
 if (pdev) {
-pc = PCI_DEVICE_GET_CLASS(pdev);
 dc = DEVICE_GET_CLASS(pdev);
 
 /*
  * Cold plugged bridges aren't themselves hot-pluggable.
  * Hotplugged bridges *are* hot-pluggable.
  */
-cold_plugged_bridge = pc->is_bridge && !DEVICE(pdev)->hotplugged;
+cold_plugged_bridge = IS_PCI_BRIDGE(pdev) &&
+  !DEVICE(pdev)->hotplugged;
 bridge_in_acpi =  cold_plugged_bridge && pcihp_bridge_en;
 
 hotpluggbale_slot = bsel && dc->hotpluggable &&
diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
index a361e519d0..3d4e6b59cd 100644
--- a/hw/pci-bridge/cxl_downstream.c
+++ b/hw/pci-bridge/cxl_downstream.c
@@ -217,7 +217,6 @@ static void cxl_dsp_class_init(ObjectClass *oc, void *data)
 DeviceClass *dc = DEVICE_CLASS(oc);
 PCIDeviceClass *k = PCI_DEVICE_CLASS(oc);
 
-k->is_bridge = true;
 k->config_write = cxl_dsp_config_write;
 k->realize = cxl_dsp_realize;
 k->exit = cxl_dsp_exitfn;
diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
index 9b8b57df9d..9df436cb73 100644
--- a/hw/pci-bridge/cxl_upstream.c
+++ b/hw/pci-bridge/cxl_upstream.c
@@ -375,7 +375,6 @@ static void cxl_upstream_class_init(ObjectClass *oc, void 
*data)
 DeviceClass *dc = 

Re: [PATCH-for-7.2] target/ppc: Fix build warnings when building with 'disable-tcg'

2022-11-16 Thread Philippe Mathieu-Daudé

On 16/11/22 14:17, Vaibhav Jain wrote:

Kowshik reported that building qemu with GCC 12.2.1 for 'ppc64-softmmu'
target is failing due to following build warnings:


  ../target/ppc/cpu_init.c:7018:13: error: 'ppc_restore_state_to_opc' defined 
but not used [-Werror=unused-function]
  7018 | static void ppc_restore_state_to_opc(CPUState *cs,


Fix this by wrapping these function definitions in 'ifdef CONFIG_TCG' so that
they are only defined if qemu is compiled with '--enable-tcg'

Reported-by: Kowshik Jois B S 
Signed-off-by: Vaibhav Jain 
---
  target/ppc/cpu_init.c| 2 ++
  target/ppc/excp_helper.c | 2 ++
  2 files changed, 4 insertions(+)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 32e94153d1..cbf0081374 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7015,6 +7015,7 @@ static vaddr ppc_cpu_get_pc(CPUState *cs)
  return cpu->env.nip;
  }
  
+#ifdef CONFIG_TCG

  static void ppc_restore_state_to_opc(CPUState *cs,
   const TranslationBlock *tb,
   const uint64_t *data)
@@ -7023,6 +7024,7 @@ static void ppc_restore_state_to_opc(CPUState *cs,
  
  cpu->env.nip = data[0];

  }
+#endif /* CONFIG_TCG */


Oops sorry.

Fixes: 61bd1d2942 ("target/ppc: Convert to tcg_ops restore_state_to_opc")


diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index a05a2ed595..94adcb766b 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2842,6 +2842,7 @@ void helper_td(CPUPPCState *env, target_ulong arg1, 
target_ulong arg2,
  #endif
  #endif
  
+#ifdef CONFIG_TCG

  static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t 
lane)
  {
  const uint16_t c = 0xfffc;
@@ -2924,6 +2925,7 @@ HELPER_HASH(HASHST, env->spr[SPR_HASHKEYR], true)
  HELPER_HASH(HASHCHK, env->spr[SPR_HASHKEYR], false)
  HELPER_HASH(HASHSTP, env->spr[SPR_HASHPKEYR], true)
  HELPER_HASH(HASHCHKP, env->spr[SPR_HASHPKEYR], false)
+#endif /* CONFIG_TCG */


Fixes: 670f1da374 ("target/ppc: Implement hashst and hashchk")

Hmm this is another fix... You could split your patch in 2,
but not a big deal. Regardless:

Reviewed-by: Philippe Mathieu-Daudé 




[PATCH 0/2] remove redundant field PCIDeviceClass::is_bridge

2022-11-16 Thread Igor Mammedov


Igor Mammedov (2):
  remove DEC 21154 PCI bridge
  pci: drop redundant PCIDeviceClass::is_bridge field

 hw/pci-bridge/dec.h|   9 --
 include/hw/pci/pci.h   |  11 +-
 include/hw/pci/pci_bridge.h|   1 +
 include/hw/pci/pci_ids.h   |   1 -
 hw/acpi/pcihp.c|   3 +-
 hw/i386/acpi-build.c   |   5 +-
 hw/pci-bridge/cxl_downstream.c |   1 -
 hw/pci-bridge/cxl_upstream.c   |   1 -
 hw/pci-bridge/dec.c| 164 -
 hw/pci-bridge/i82801b11.c  |   1 -
 hw/pci-bridge/meson.build  |   2 -
 hw/pci-bridge/pci_bridge_dev.c |   1 -
 hw/pci-bridge/pcie_pci_bridge.c|   1 -
 hw/pci-bridge/pcie_root_port.c |   1 -
 hw/pci-bridge/simba.c  |   1 -
 hw/pci-bridge/xio3130_downstream.c |   1 -
 hw/pci-bridge/xio3130_upstream.c   |   1 -
 hw/pci-host/designware.c   |   1 -
 hw/pci-host/uninorth.c |   6 --
 hw/pci-host/xilinx-pcie.c  |   1 -
 hw/pci/pci.c   |  20 ++--
 hw/ppc/spapr_pci.c |  15 +--
 22 files changed, 19 insertions(+), 229 deletions(-)
 delete mode 100644 hw/pci-bridge/dec.h
 delete mode 100644 hw/pci-bridge/dec.c

-- 
2.31.1




Re: [PATCH] target/ppc: Fix build warnings when building with 'disable-tcg'

2022-11-16 Thread Greg Kurz
Hi Vaibhav,

Nice to see some people are still building QEMU at IBM ;-)

On Wed, 16 Nov 2022 18:47:43 +0530
Vaibhav Jain  wrote:

> Kowshik reported that building qemu with GCC 12.2.1 for 'ppc64-softmmu'
> target is failing due to following build warnings:
> 
> 
>  ../target/ppc/cpu_init.c:7018:13: error: 'ppc_restore_state_to_opc' defined 
> but not used [-Werror=unused-function]
>  7018 | static void ppc_restore_state_to_opc(CPUState *cs,
> 
> 
> Fix this by wrapping these function definitions in 'ifdef CONFIG_TCG' so that
> they are only defined if qemu is compiled with '--enable-tcg'
> 
> Reported-by: Kowshik Jois B S 
> Signed-off-by: Vaibhav Jain 
> ---

Reviewed-by: Greg Kurz 

This was introduced by a recent commit.

Fixes: 61bd1d29421a ("target/ppc: Convert to tcg_ops restore_state_to_opc")


Vaibhav,

This is serious enough it should get fixed in 7.2. Please fill up an
issue as explain in [1].

Cheers,

--
Greg

[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-11/msg00137.html

>  target/ppc/cpu_init.c| 2 ++
>  target/ppc/excp_helper.c | 2 ++
>  2 files changed, 4 insertions(+)
> 
> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
> index 32e94153d1..cbf0081374 100644
> --- a/target/ppc/cpu_init.c
> +++ b/target/ppc/cpu_init.c
> @@ -7015,6 +7015,7 @@ static vaddr ppc_cpu_get_pc(CPUState *cs)
>  return cpu->env.nip;
>  }
>  
> +#ifdef CONFIG_TCG
>  static void ppc_restore_state_to_opc(CPUState *cs,
>   const TranslationBlock *tb,
>   const uint64_t *data)
> @@ -7023,6 +7024,7 @@ static void ppc_restore_state_to_opc(CPUState *cs,
>  
>  cpu->env.nip = data[0];
>  }
> +#endif /* CONFIG_TCG */
>  
>  static bool ppc_cpu_has_work(CPUState *cs)
>  {
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index a05a2ed595..94adcb766b 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -2842,6 +2842,7 @@ void helper_td(CPUPPCState *env, target_ulong arg1, 
> target_ulong arg2,
>  #endif
>  #endif
>  
> +#ifdef CONFIG_TCG
>  static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t 
> lane)
>  {
>  const uint16_t c = 0xfffc;
> @@ -2924,6 +2925,7 @@ HELPER_HASH(HASHST, env->spr[SPR_HASHKEYR], true)
>  HELPER_HASH(HASHCHK, env->spr[SPR_HASHKEYR], false)
>  HELPER_HASH(HASHSTP, env->spr[SPR_HASHPKEYR], true)
>  HELPER_HASH(HASHCHKP, env->spr[SPR_HASHPKEYR], false)
> +#endif /* CONFIG_TCG */
>  
>  #if !defined(CONFIG_USER_ONLY)
>  




[PATCH for 8.0 v7 05/10] vdpa: move SVQ vring features check to net/

2022-11-16 Thread Eugenio Pérez
The next patches will start control SVQ if possible. However, we don't
know if that will be possible at qemu boot anymore.

Since the moved checks will be already evaluated at net/ to know if it
is ok to shadow CVQ, move them.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 33 ++---
 net/vhost-vdpa.c   |  3 ++-
 2 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 3df2775760..146f0dcb40 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -402,29 +402,9 @@ static int vhost_vdpa_get_dev_features(struct vhost_dev 
*dev,
 return ret;
 }
 
-static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
-   Error **errp)
+static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v)
 {
 g_autoptr(GPtrArray) shadow_vqs = NULL;
-uint64_t dev_features, svq_features;
-int r;
-bool ok;
-
-if (!v->shadow_vqs_enabled) {
-return 0;
-}
-
-r = vhost_vdpa_get_dev_features(hdev, _features);
-if (r != 0) {
-error_setg_errno(errp, -r, "Can't get vdpa device features");
-return r;
-}
-
-svq_features = dev_features;
-ok = vhost_svq_valid_features(svq_features, errp);
-if (unlikely(!ok)) {
-return -1;
-}
 
 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
@@ -436,7 +416,6 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 }
 
 v->shadow_vqs = g_steal_pointer(_vqs);
-return 0;
 }
 
 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
@@ -461,11 +440,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void 
*opaque, Error **errp)
 dev->opaque =  opaque ;
 v->listener = vhost_vdpa_memory_listener;
 v->msg_type = VHOST_IOTLB_MSG_V2;
-ret = vhost_vdpa_init_svq(dev, v, errp);
-if (ret) {
-goto err;
-}
-
+vhost_vdpa_init_svq(dev, v);
 vhost_vdpa_get_iova_range(v);
 
 if (!vhost_vdpa_first_dev(dev)) {
@@ -476,10 +451,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void 
*opaque, Error **errp)
VIRTIO_CONFIG_S_DRIVER);
 
 return 0;
-
-err:
-ram_block_discard_disable(false);
-return ret;
 }
 
 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index e98d5f5eac..dd9cea42d0 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -117,9 +117,10 @@ static bool vhost_vdpa_net_valid_svq_features(uint64_t 
features, Error **errp)
 if (invalid_dev_features) {
 error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
invalid_dev_features);
+return false;
 }
 
-return !invalid_dev_features;
+return vhost_svq_valid_features(features, errp);
 }
 
 static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
-- 
2.31.1




[PATCH for 8.0 v7 06/10] vdpa: Allocate SVQ unconditionally

2022-11-16 Thread Eugenio Pérez
SVQ may run or not in a device depending on runtime conditions (for
example, if the device can move CVQ to its own group or not).

Allocate the SVQ array unconditionally at startup, since its hard to
move this allocation elsewhere.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 146f0dcb40..23efb8f49d 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -547,10 +547,6 @@ static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
 struct vhost_vdpa *v = dev->opaque;
 size_t idx;
 
-if (!v->shadow_vqs) {
-return;
-}
-
 for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
 }
-- 
2.31.1




[PATCH for 8.0 v7 04/10] vdpa: add vhost_vdpa_net_valid_svq_features

2022-11-16 Thread Eugenio Pérez
It will be reused at vdpa device start so let's extract in its own function

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 net/vhost-vdpa.c | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 6811089231..e98d5f5eac 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -106,6 +106,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
 return s->vhost_net;
 }
 
+static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
+{
+uint64_t invalid_dev_features =
+features & ~vdpa_svq_device_features &
+/* Transport are all accepted at this point */
+~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
+ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
+
+if (invalid_dev_features) {
+error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
+   invalid_dev_features);
+}
+
+return !invalid_dev_features;
+}
+
 static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
 {
 uint32_t device_id;
@@ -675,15 +691,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 if (opts->x_svq) {
 struct vhost_vdpa_iova_range iova_range;
 
-uint64_t invalid_dev_features =
-features & ~vdpa_svq_device_features &
-/* Transport are all accepted at this point */
-~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
- VIRTIO_TRANSPORT_F_END - 
VIRTIO_TRANSPORT_F_START);
-
-if (invalid_dev_features) {
-error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
-   invalid_dev_features);
+if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
 goto err_svq;
 }
 
-- 
2.31.1




[PATCH for 8.0 v7 07/10] vdpa: Add asid parameter to vhost_vdpa_dma_map/unmap

2022-11-16 Thread Eugenio Pérez
So the caller can choose which ASID is destined.

No need to update the batch functions as they will always be called from
memory listener updates at the moment. Memory listener updates will
always update ASID 0, as it's the passthrough ASID.

All vhost devices's ASID are 0 at this moment.

Signed-off-by: Eugenio Pérez 
---
v7:
* Move comment on zero initailization of vhost_vdpa_dma_map above the
  functions.
* Add VHOST_VDPA_GUEST_PA_ASID macro.

v5:
* Solve conflict, now vhost_vdpa_svq_unmap_ring returns void
* Change comment on zero initialization.

v4: Add comment specifying behavior if device does not support _F_ASID

v3: Deleted unneeded space
---
 include/hw/virtio/vhost-vdpa.h | 14 ++---
 hw/virtio/vhost-vdpa.c | 36 +++---
 net/vhost-vdpa.c   |  6 +++---
 hw/virtio/trace-events |  4 ++--
 4 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index d85643..e57dfa1fd1 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -19,6 +19,12 @@
 #include "hw/virtio/virtio.h"
 #include "standard-headers/linux/vhost_types.h"
 
+/*
+ * ASID dedicated to map guest's addresses.  If SVQ is disabled it maps GPA to
+ * qemu's IOVA.  If SVQ is enabled it maps also the SVQ vring here
+ */
+#define VHOST_VDPA_GUEST_PA_ASID 0
+
 typedef struct VhostVDPAHostNotifier {
 MemoryRegion mr;
 void *addr;
@@ -29,6 +35,7 @@ typedef struct vhost_vdpa {
 int index;
 uint32_t msg_type;
 bool iotlb_batch_begin_sent;
+uint32_t address_space_id;
 MemoryListener listener;
 struct vhost_vdpa_iova_range iova_range;
 uint64_t acked_features;
@@ -42,8 +49,9 @@ typedef struct vhost_vdpa {
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
 
-int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
-   void *vaddr, bool readonly);
-int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
+   hwaddr size, void *vaddr, bool readonly);
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
+ hwaddr size);
 
 #endif
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 23efb8f49d..1e4e1cb523 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -72,22 +72,28 @@ static bool 
vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
 return false;
 }
 
-int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
-   void *vaddr, bool readonly)
+/*
+ * The caller must set asid = 0 if the device does not support asid.
+ * This is not an ABI break since it is set to 0 by the initializer anyway.
+ */
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
+   hwaddr size, void *vaddr, bool readonly)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
 int ret = 0;
 
 msg.type = v->msg_type;
+msg.asid = asid;
 msg.iotlb.iova = iova;
 msg.iotlb.size = size;
 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
 msg.iotlb.type = VHOST_IOTLB_UPDATE;
 
-   trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size,
-msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type);
+trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova,
+ msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm,
+ msg.iotlb.type);
 
 if (write(fd, , sizeof(msg)) != sizeof(msg)) {
 error_report("failed to write, fd=%d, errno=%d (%s)",
@@ -98,18 +104,24 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, 
hwaddr size,
 return ret;
 }
 
-int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
+/*
+ * The caller must set asid = 0 if the device does not support asid.
+ * This is not an ABI break since it is set to 0 by the initializer anyway.
+ */
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova,
+ hwaddr size)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
 int ret = 0;
 
 msg.type = v->msg_type;
+msg.asid = asid;
 msg.iotlb.iova = iova;
 msg.iotlb.size = size;
 msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
 
-trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova,
+trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova,
msg.iotlb.size, msg.iotlb.type);
 
 if (write(fd, , sizeof(msg)) != sizeof(msg)) {
@@ -229,8 +241,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener 
*listener,
 }
 
 vhost_vdpa_iotlb_batch_begin_once(v);
-ret = vhost_vdpa_dma_map(v, iova, 

[PATCH for 8.0 v7 02/10] vhost: set SVQ device call handler at SVQ start

2022-11-16 Thread Eugenio Pérez
By the end of this series CVQ is shadowed as long as the features
support it.

Since we don't know at the beginning of qemu running if this is
supported, move the event notifier handler setting to the start of the
SVQ, instead of the start of qemu run.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 hw/virtio/vhost-shadow-virtqueue.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 5bd14cad96..264ddc166d 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -648,6 +648,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
VirtIODevice *vdev,
 {
 size_t desc_size, driver_size, device_size;
 
+event_notifier_set_handler(>hdev_call, vhost_svq_handle_call);
 svq->next_guest_avail_elem = NULL;
 svq->shadow_avail_idx = 0;
 svq->shadow_used_idx = 0;
@@ -704,6 +705,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
 g_free(svq->desc_state);
 qemu_vfree(svq->vring.desc);
 qemu_vfree(svq->vring.used);
+event_notifier_set_handler(>hdev_call, NULL);
 }
 
 /**
@@ -740,7 +742,6 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree 
*iova_tree,
 }
 
 event_notifier_init_fd(>svq_kick, VHOST_FILE_UNBIND);
-event_notifier_set_handler(>hdev_call, vhost_svq_handle_call);
 svq->iova_tree = iova_tree;
 svq->ops = ops;
 svq->ops_opaque = ops_opaque;
@@ -763,7 +764,6 @@ void vhost_svq_free(gpointer pvq)
 VhostShadowVirtqueue *vq = pvq;
 vhost_svq_stop(vq);
 event_notifier_cleanup(>hdev_kick);
-event_notifier_set_handler(>hdev_call, NULL);
 event_notifier_cleanup(>hdev_call);
 g_free(vq);
 }
-- 
2.31.1




[PATCH for 8.0 v7 00/10] ASID support in vhost-vdpa net

2022-11-16 Thread Eugenio Pérez
Control VQ is the way net devices use to send changes to the device state, like
the number of active queues or its mac address.

QEMU needs to intercept this queue so it can track these changes and is able to
migrate the device. It can do it from 1576dbb5bbc4 ("vdpa: Add x-svq to
NetdevVhostVDPAOptions"). However, to enable x-svq implies to shadow all VirtIO
device's virtqueues, which will damage performance.

This series adds address space isolation, so the device and the guest
communicate directly with them (passthrough) and CVQ communication is split in
two: The guest communicates with QEMU and QEMU forwards the commands to the
device.

This patch add new features so is targeted for qemu 8.0.

Comments are welcome. Thanks!

v7:
- Never ask for number of address spaces, just react if isolation is not
  possible.
- Return ASID ioctl errors instead of masking them as if the device has
  no asid.
- Rename listener_shadow_vq to shadow_data
- Move comment on zero initailization of vhost_vdpa_dma_map above the
  functions.
- Add VHOST_VDPA_GUEST_PA_ASID macro.

v6:
- Do not allocate SVQ resources like file descriptors if SVQ cannot be used.
- Disable shadow CVQ if the device does not support it because of net
  features.

v5:
- Move vring state in vhost_vdpa_get_vring_group instead of using a
  parameter.
- Rename VHOST_VDPA_NET_CVQ_PASSTHROUGH to VHOST_VDPA_NET_DATA_ASID

v4:
- Rebased on last CVQ start series, that allocated CVQ cmd bufs at load
- Squash vhost_vdpa_cvq_group_is_independent.
- Do not check for cvq index on vhost_vdpa_net_prepare, we only have one
  that callback registered in that NetClientInfo.
- Add comment specifying behavior if device does not support _F_ASID
- Update headers to a later Linux commit to not to remove SETUP_RNG_SEED

v3:
- Do not return an error but just print a warning if vdpa device initialization
  returns failure while getting AS num of VQ groups
- Delete extra newline

v2:
- Much as commented on series [1], handle vhost_net backend through
  NetClientInfo callbacks instead of directly.
- Fix not freeing SVQ properly when device does not support CVQ
- Add BIT_ULL missed checking device's backend feature for _F_ASID.

Eugenio Pérez (10):
  vdpa: Use v->shadow_vqs_enabled in vhost_vdpa_svqs_start & stop
  vhost: set SVQ device call handler at SVQ start
  vhost: Allocate SVQ device file descriptors at device start
  vdpa: add vhost_vdpa_net_valid_svq_features
  vdpa: move SVQ vring features check to net/
  vdpa: Allocate SVQ unconditionally
  vdpa: Add asid parameter to vhost_vdpa_dma_map/unmap
  vdpa: Store x-svq parameter in VhostVDPAState
  vdpa: Add shadow_data to vhost_vdpa
  vdpa: Always start CVQ in SVQ mode if possible

 include/hw/virtio/vhost-vdpa.h |  16 ++-
 hw/virtio/vhost-shadow-virtqueue.c |  35 +--
 hw/virtio/vhost-vdpa.c | 121 ---
 net/vhost-vdpa.c   | 152 ++---
 hw/virtio/trace-events |   4 +-
 5 files changed, 217 insertions(+), 111 deletions(-)

-- 
2.31.1





[PATCH for 8.0 v7 08/10] vdpa: Store x-svq parameter in VhostVDPAState

2022-11-16 Thread Eugenio Pérez
CVQ can be shadowed two ways:
- Device has x-svq=on parameter (current way)
- The device can isolate CVQ in its own vq group

QEMU needs to check for the second condition dynamically, because CVQ
index is not known at initialization time. Since this is dynamic, the
CVQ isolation could vary with different conditions, making it possible
to go from "not isolated group" to "isolated".

Saving the cmdline parameter in an extra field so we never disable CVQ
SVQ in case the device was started with cmdline.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 89b01fcaec..5185ac7042 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -38,6 +38,8 @@ typedef struct VhostVDPAState {
 void *cvq_cmd_out_buffer;
 virtio_net_ctrl_ack *status;
 
+/* The device always have SVQ enabled */
+bool always_svq;
 bool started;
 } VhostVDPAState;
 
@@ -566,6 +568,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+s->always_svq = svq;
 s->vhost_vdpa.shadow_vqs_enabled = svq;
 s->vhost_vdpa.iova_tree = iova_tree;
 if (!is_datapath) {
-- 
2.31.1




[PATCH for 8.0 v7 10/10] vdpa: Always start CVQ in SVQ mode if possible

2022-11-16 Thread Eugenio Pérez
Isolate control virtqueue in its own group, allowing to intercept control
commands but letting dataplane run totally passthrough to the guest.

Signed-off-by: Eugenio Pérez 
---
v7:
* Never ask for number of address spaces, just react if isolation is not
  possible.
* Return ASID ioctl errors instead of masking them as if the device has
  no asid.
* Simplify net_init_vhost_vdpa logic
* Add "if possible" suffix

v6:
* Disable control SVQ if the device does not support it because of
features.

v5:
* Fixing the not adding cvq buffers when x-svq=on is specified.
* Move vring state in vhost_vdpa_get_vring_group instead of using a
  parameter.
* Rename VHOST_VDPA_NET_CVQ_PASSTHROUGH to VHOST_VDPA_NET_DATA_ASID

v4:
* Squash vhost_vdpa_cvq_group_is_independent.
* Rebased on last CVQ start series, that allocated CVQ cmd bufs at load
* Do not check for cvq index on vhost_vdpa_net_prepare, we only have one
  that callback registered in that NetClientInfo.

v3:
* Make asid related queries print a warning instead of returning an
  error and stop the start of qemu.
---
 hw/virtio/vhost-vdpa.c |   3 +-
 net/vhost-vdpa.c   | 117 +++--
 2 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 852baf8b2c..a29a18a6a9 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -653,7 +653,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
 {
 uint64_t features;
 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
-0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
+0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
+0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
 int r;
 
 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, )) {
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index a9c864741a..dc13a49311 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -101,6 +101,8 @@ static const uint64_t vdpa_svq_device_features =
 BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
 BIT_ULL(VIRTIO_NET_F_STANDBY);
 
+#define VHOST_VDPA_NET_CVQ_ASID 1
+
 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
@@ -242,6 +244,40 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
+{
+struct vhost_vring_state state = {
+.index = vq_index,
+};
+int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, );
+
+if (unlikely(r < 0)) {
+error_report("Cannot get VQ %u group: %s", vq_index,
+ g_strerror(errno));
+return r;
+}
+
+return state.num;
+}
+
+static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
+   unsigned vq_group,
+   unsigned asid_num)
+{
+struct vhost_vring_state asid = {
+.index = vq_group,
+.num = asid_num,
+};
+int r;
+
+r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, );
+if (unlikely(r < 0)) {
+error_report("Can't set vq group %u asid %u, errno=%d (%s)",
+ asid.index, asid.num, errno, g_strerror(errno));
+}
+return r;
+}
+
 static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
 {
 VhostIOVATree *tree = v->iova_tree;
@@ -316,11 +352,69 @@ dma_map_err:
 static int vhost_vdpa_net_cvq_start(NetClientState *nc)
 {
 VhostVDPAState *s;
-int r;
+struct vhost_vdpa *v;
+uint64_t backend_features;
+int64_t cvq_group;
+int cvq_index, r;
 
 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 
 s = DO_UPCAST(VhostVDPAState, nc, nc);
+v = >vhost_vdpa;
+
+v->shadow_data = s->always_svq;
+v->shadow_vqs_enabled = s->always_svq;
+s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
+
+if (s->always_svq) {
+goto out;
+}
+
+/* Backend features are not available in v->dev yet. */
+r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, _features);
+if (unlikely(r < 0)) {
+error_report("Cannot get vdpa backend_features: %s(%d)",
+g_strerror(errno), errno);
+return -1;
+}
+if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) ||
+!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
+return 0;
+}
+
+/**
+ * Check if all the virtqueues of the virtio device are in a different vq
+ * than the last vq. VQ group of last group passed in cvq_group.
+ */
+cvq_index = v->dev->vq_index_end - 1;
+cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index);
+if (unlikely(cvq_group < 0)) {
+return cvq_group;
+}
+for (int i = 0; i < cvq_index; ++i) {
+int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i);
+
+if (unlikely(group < 0)) {
+return group;
+}
+
+

[PATCH for 8.0 v7 09/10] vdpa: Add shadow_data to vhost_vdpa

2022-11-16 Thread Eugenio Pérez
The memory listener that thells the device how to convert GPA to qemu's
va is registered against CVQ vhost_vdpa. memory listener translations
are always ASID 0, CVQ ones are ASID 1 if supported.

Let's tell the listener if it needs to register them on iova tree or
not.

Signed-off-by: Eugenio Pérez 
---
v7: Rename listener_shadow_vq to shadow_data
v5: Solve conflict about vhost_iova_tree_remove accepting mem_region by
value.
---
 include/hw/virtio/vhost-vdpa.h | 2 ++
 hw/virtio/vhost-vdpa.c | 6 +++---
 net/vhost-vdpa.c   | 1 +
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index e57dfa1fd1..45b969a311 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -40,6 +40,8 @@ typedef struct vhost_vdpa {
 struct vhost_vdpa_iova_range iova_range;
 uint64_t acked_features;
 bool shadow_vqs_enabled;
+/* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
+bool shadow_data;
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
 GPtrArray *shadow_vqs;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 1e4e1cb523..852baf8b2c 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -224,7 +224,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener 
*listener,
  vaddr, section->readonly);
 
 llsize = int128_sub(llend, int128_make64(iova));
-if (v->shadow_vqs_enabled) {
+if (v->shadow_data) {
 int r;
 
 mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr,
@@ -251,7 +251,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener 
*listener,
 return;
 
 fail_map:
-if (v->shadow_vqs_enabled) {
+if (v->shadow_data) {
 vhost_iova_tree_remove(v->iova_tree, mem_region);
 }
 
@@ -296,7 +296,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener 
*listener,
 
 llsize = int128_sub(llend, int128_make64(iova));
 
-if (v->shadow_vqs_enabled) {
+if (v->shadow_data) {
 const DMAMap *result;
 const void *vaddr = memory_region_get_ram_ptr(section->mr) +
 section->offset_within_region +
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 5185ac7042..a9c864741a 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -570,6 +570,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 s->vhost_vdpa.index = queue_pair_index;
 s->always_svq = svq;
 s->vhost_vdpa.shadow_vqs_enabled = svq;
+s->vhost_vdpa.shadow_data = svq;
 s->vhost_vdpa.iova_tree = iova_tree;
 if (!is_datapath) {
 s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
-- 
2.31.1




[PATCH for 8.0 v7 01/10] vdpa: Use v->shadow_vqs_enabled in vhost_vdpa_svqs_start & stop

2022-11-16 Thread Eugenio Pérez
This function used to trust in v->shadow_vqs != NULL to know if it must
start svq or not.

This is not going to be valid anymore, as qemu is going to allocate svq
unconditionally (but it will only start them conditionally).

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 7468e44b87..7f0ff4df5b 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1029,7 +1029,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
 Error *err = NULL;
 unsigned i;
 
-if (!v->shadow_vqs) {
+if (!v->shadow_vqs_enabled) {
 return true;
 }
 
@@ -1082,7 +1082,7 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
 {
 struct vhost_vdpa *v = dev->opaque;
 
-if (!v->shadow_vqs) {
+if (!v->shadow_vqs_enabled) {
 return;
 }
 
-- 
2.31.1




  1   2   3   >