date:20221012

Re: [PATCH v3 2/3] block: introduce zone append write for zoned devices

2022-10-12 Thread Damien Le Moal

On 10/10/22 11:33, Sam Li wrote:
> A zone append command is a write operation that specifies the first
> logical block of a zone as the write position. When writing to a zoned
> block device using zone append, the byte offset of writes is pointing
> to the write pointer of that zone. Upon completion the device will
> respond with the position the data has been written in the zone.
> 
> Signed-off-by: Sam Li 
> ---
>  block/block-backend.c | 64 +++
>  block/file-posix.c| 64 ---
>  block/io.c| 21 ++
>  block/raw-format.c|  7 
>  include/block/block-io.h  |  3 ++
>  include/block/block_int-common.h  |  3 ++
>  include/block/raw-aio.h   |  4 +-
>  include/sysemu/block-backend-io.h |  9 +
>  8 files changed, 168 insertions(+), 7 deletions(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index ddc569e3ac..bfdb719bc8 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1439,6 +1439,9 @@ typedef struct BlkRwCo {
>  struct {
>  BlockZoneOp op;
>  } zone_mgmt;
> +struct {
> +int64_t *append_sector;

I would call this "sector", since it will always be referenced as
"->zone_append.sector", you get the "append" for free :)

That said, shouldn't this be a byte value, so called "offset" ? Not
entirely sure...

> +} zone_append;
>  };
>  } BlkRwCo;
>  
> @@ -1869,6 +1872,46 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, 
> BlockZoneOp op,
>  return >common;
>  }
>  
> +static void coroutine_fn blk_aio_zone_append_entry(void *opaque) {
> +BlkAioEmAIOCB *acb = opaque;
> +BlkRwCo *rwco = >rwco;
> +
> +rwco->ret = blk_co_zone_append(rwco->blk, 
> rwco->zone_append.append_sector,
> +   rwco->iobuf, rwco->flags);
> +blk_aio_complete(acb);
> +}
> +
> +BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
> +QEMUIOVector *qiov, BdrvRequestFlags flags,
> +BlockCompletionFunc *cb, void *opaque) {
> +BlkAioEmAIOCB *acb;
> +Coroutine *co;
> +IO_CODE();
> +
> +blk_inc_in_flight(blk);
> +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> +acb->rwco = (BlkRwCo) {
> +.blk= blk,
> +.ret= NOT_DONE,
> +.flags  = flags,
> +.iobuf  = qiov,
> +.zone_append = {
> +.append_sector = offset,

See above comment. So since this is a byte value, this needs to be
called "offset", no ?

> +},
> +};
> +acb->has_returned = false;
> +
> +co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
> +bdrv_coroutine_enter(blk_bs(blk), co);
> +acb->has_returned = true;
> +if (acb->rwco.ret != NOT_DONE) {
> +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
> + blk_aio_complete_bh, acb);
> +}
> +
> +return >common;
> +}
> +
>  /*
>   * Send a zone_report command.
>   * offset is a byte offset from the start of the device. No alignment
> @@ -1921,6 +1964,27 @@ int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, 
> BlockZoneOp op,
>  return ret;
>  }
>  
> +/*
> + * Send a zone_append command.
> + */
> +int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
> +QEMUIOVector *qiov, BdrvRequestFlags flags)
> +{
> +int ret;
> +IO_CODE();
> +
> +blk_inc_in_flight(blk);
> +blk_wait_while_drained(blk);
> +if (!blk_is_available(blk)) {
> +blk_dec_in_flight(blk);
> +return -ENOMEDIUM;
> +}
> +
> +ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
> +blk_dec_in_flight(blk);
> +return ret;
> +}
> +
>  void blk_drain(BlockBackend *blk)
>  {
>  BlockDriverState *bs = blk_bs(blk);
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 17c0b58158..08ab164df4 100755
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1657,7 +1657,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData 
> *aiocb)
>  ssize_t len;
>  
>  do {
> -if (aiocb->aio_type & QEMU_AIO_WRITE)
> +if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))
>  len = qemu_pwritev(aiocb->aio_fildes,
> aiocb->io.iov,
> aiocb->io.niov,

Hu... You are issuing the io for a zone append without first changing
the aiocb offset to be equal to the zone write pointer ? And you are
calling this without the wps->lock held... Changing the aio offset to be
equal to the wp && issuing the io must be atomic.

> @@ -1687,7 +1687,7 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData 
> *aiocb, char *buf)
>  ssize_t len;
>  
>  while (offset < aiocb->aio_nbytes) {
> -if (aiocb->aio_type &

[PATCH] tests/qtest/libqos/e1000e: Use e1000_regs.h

2022-10-12 Thread Akihiko Odaki

The register definitions in tests/qtest/libqos/e1000e.c had names
different from hw/net/e1000_regs.h, which made it hard to understand
what test codes corresponds to the implementation. Use
hw/net/e1000_regs.h from tests/qtest/libqos/e1000e.c to remove
these duplications.

E1000E_CTRL_EXT_TXLSFLOW is removed from E1000E_CTRL_EXT settings
because hw/net/e1000_regs.h does not have the definition and it is for
TCP segmentation offload, which does not matter for the implemented
tests.

Signed-off-by: Akihiko Odaki 
---
 hw/net/e1000_regs.h |   1 +
 tests/qtest/libqos/e1000e.c | 119 +---
 2 files changed, 45 insertions(+), 75 deletions(-)

diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h
index e274c4fcd2..170452e0af 100644
--- a/hw/net/e1000_regs.h
+++ b/hw/net/e1000_regs.h
@@ -793,6 +793,7 @@
 #define E1000_CTRL_EXT_ASDCHK  0x1000 /* auto speed detection check */
 #define E1000_CTRL_EXT_EE_RST  0x2000 /* EEPROM reset */
 #define E1000_CTRL_EXT_LINK_EN 0x0001 /* enable link status from external 
LINK_0 and LINK_1 pins */
+#define E1000_CTRL_EXT_DRV_LOAD 0x1000 /* Driver loaded bit for FW */
 #define E1000_CTRL_EXT_EIAME   0x0100
 #define E1000_CTRL_EXT_IAME0x0800 /* Int ACK Auto-mask */
 #define E1000_CTRL_EXT_PBA_CLR 0x8000 /* PBA Clear */
diff --git a/tests/qtest/libqos/e1000e.c b/tests/qtest/libqos/e1000e.c
index f87e0e84b2..9b6bb17565 100644
--- a/tests/qtest/libqos/e1000e.c
+++ b/tests/qtest/libqos/e1000e.c
@@ -17,6 +17,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/net/e1000_regs.h"
 #include "../libqtest.h"
 #include "pci-pc.h"
 #include "qemu/sockets.h"
@@ -27,49 +28,13 @@
 #include "qgraph.h"
 #include "e1000e.h"
 
-#define E1000E_IMS  (0x00d0)
+#define E1000E_IVAR_TEST_CFG \
+(E1000E_RX0_MSG_ID | E1000_IVAR_INT_ALLOC_VALID | \
+ ((E1000E_TX0_MSG_ID | E1000_IVAR_INT_ALLOC_VALID) << 8)| \
+ ((E1000E_OTHER_MSG_ID | E1000_IVAR_INT_ALLOC_VALID) << 16) | \
+ E1000_IVAR_TX_INT_EVERY_WB)
 
-#define E1000E_STATUS   (0x0008)
-#define E1000E_STATUS_LU BIT(1)
-#define E1000E_STATUS_ASDV1000 BIT(9)
-
-#define E1000E_CTRL (0x)
-#define E1000E_CTRL_RESET BIT(26)
-
-#define E1000E_RCTL (0x0100)
-#define E1000E_RCTL_EN  BIT(1)
-#define E1000E_RCTL_UPE BIT(3)
-#define E1000E_RCTL_MPE BIT(4)
-
-#define E1000E_RFCTL (0x5008)
-#define E1000E_RFCTL_EXTEN  BIT(15)
-
-#define E1000E_TCTL (0x0400)
-#define E1000E_TCTL_EN  BIT(1)
-
-#define E1000E_CTRL_EXT (0x0018)
-#define E1000E_CTRL_EXT_DRV_LOADBIT(28)
-#define E1000E_CTRL_EXT_TXLSFLOWBIT(22)
-
-#define E1000E_IVAR (0x00E4)
-#define E1000E_IVAR_TEST_CFG((E1000E_RX0_MSG_ID << 0)| BIT(3)  | \
- (E1000E_TX0_MSG_ID << 8)| BIT(11) | \
- (E1000E_OTHER_MSG_ID << 16) | BIT(19) | \
- BIT(31))
-
-#define E1000E_RING_LEN (0x1000)
-
-#define E1000E_TDBAL(0x3800)
-
-#define E1000E_TDBAH(0x3804)
-#define E1000E_TDH  (0x3810)
-
-#define E1000E_RDBAL(0x2800)
-#define E1000E_RDBAH(0x2804)
-#define E1000E_RDH  (0x2810)
-
-#define E1000E_TXD_LEN  (16)
-#define E1000E_RXD_LEN  (16)
+#define E1000E_RING_LEN (0x1000)
 
 static void e1000e_macreg_write(QE1000E *d, uint32_t reg, uint32_t val)
 {
@@ -87,30 +52,34 @@ void e1000e_tx_ring_push(QE1000E *d, void *descr)
 {
 QE1000E_PCI *d_pci = container_of(d, QE1000E_PCI, e1000e);
 uint32_t tail = e1000e_macreg_read(d, E1000E_TDT);
-uint32_t len = e1000e_macreg_read(d, E1000E_TDLEN) / E1000E_TXD_LEN;
+uint32_t len = e1000e_macreg_read(d, E1000E_TDLEN) / E1000_RING_DESC_LEN;
 
-qtest_memwrite(d_pci->pci_dev.bus->qts, d->tx_ring + tail * E1000E_TXD_LEN,
-   descr, E1000E_TXD_LEN);
+qtest_memwrite(d_pci->pci_dev.bus->qts,
+   d->tx_ring + tail * E1000_RING_DESC_LEN,
+   descr, E1000_RING_DESC_LEN);
 e1000e_macreg_write(d, E1000E_TDT, (tail + 1) % len);
 
 /* Read WB data for the packet transmitted */
-qtest_memread(d_pci->pci_dev.bus->qts, d->tx_ring + tail * E1000E_TXD_LEN,
-  descr, E1000E_TXD_LEN);
+qtest_memread(d_pci->pci_dev.bus->qts,
+  d->tx_ring + tail * E1000_RING_DESC_LEN,
+  descr, E1000_RING_DESC_LEN);
 }
 
 void e1000e_rx_ring_push(QE1000E *d, void *descr)
 {
 QE1000E_PCI *d_pci = container_of(d, QE1000E_PCI, e1000e);
 uint32_t tail = e1000e_macreg_read(d, E1000E_RDT);
-uint32_t len = e1000e_macreg_read(d, E1000E_RDLEN) / E1000E_RXD_LEN;
+uint32_t len = e1000e_macreg_read(d, E1000E_RDLEN) / E1000_RING_DESC_LEN;
 
-qtest_memwrite(d_pci->pci_dev.bus->qts, d->rx_ring + tail * E1000E_RXD_LEN,
-   descr, E1000E_RXD_LEN);
+qtest_memwrite(d_pci->pci_dev.bus->qts,
+   d->rx_ring + tail * E1000_RING_DESC_LEN,

Re: [PATCH v11 3/7] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-10-12 Thread Damien Le Moal

On 10/13/22 14:33, Sam Li wrote:
> Damien Le Moal  于2022年10月13日周四 12:41写道：
>>
>> On 10/10/22 11:21, Sam Li wrote:
>>> Add a new zoned_host_device BlockDriver. The zoned_host_device option
>>> accepts only zoned host block devices. By adding zone management
>>> operations in this new BlockDriver, users can use the new block
>>> layer APIs including Report Zone and four zone management operations
>>> (open, close, finish, reset, reset_all).
>>>
>>> Qemu-io uses the new APIs to perform zoned storage commands of the device:
>>> zone_report(zrp), zone_open(zo), zone_close(zc), zone_reset(zrs),
>>> zone_finish(zf).
>>>
>>> For example, to test zone_report, use following command:
>>> $ ./build/qemu-io --image-opts -n driver=zoned_host_device, 
>>> filename=/dev/nullb0
>>> -c "zrp offset nr_zones"
>>>
>>> Signed-off-by: Sam Li 
>>> Reviewed-by: Hannes Reinecke 
>>> ---
>>>  block/block-backend.c | 146 +
>>>  block/file-posix.c| 329 ++
>>>  block/io.c|  41 
>>>  include/block/block-common.h  |   1 +
>>>  include/block/block-io.h  |   7 +
>>>  include/block/block_int-common.h  |  24 +++
>>>  include/block/raw-aio.h   |   6 +-
>>>  include/sysemu/block-backend-io.h |  17 ++
>>>  meson.build   |   4 +
>>>  qapi/block-core.json  |   8 +-
>>>  qemu-io-cmds.c| 148 ++
>>>  11 files changed, 728 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/block/block-backend.c b/block/block-backend.c
>>> index d4a5df2ac2..ddc569e3ac 100644
>>> --- a/block/block-backend.c
>>> +++ b/block/block-backend.c
>>> @@ -1431,6 +1431,15 @@ typedef struct BlkRwCo {
>>>  void *iobuf;
>>>  int ret;
>>>  BdrvRequestFlags flags;
>>> +union {
>>> +struct {
>>> +unsigned int *nr_zones;
>>> +BlockZoneDescriptor *zones;
>>> +} zone_report;
>>> +struct {
>>> +BlockZoneOp op;
>>> +} zone_mgmt;
>>> +};
>>>  } BlkRwCo;
>>>
>>>  int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
>>> @@ -1775,6 +1784,143 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
>>>  return ret;
>>>  }
>>>
>>> +static void coroutine_fn blk_aio_zone_report_entry(void *opaque) {
>>> +BlkAioEmAIOCB *acb = opaque;
>>> +BlkRwCo *rwco = >rwco;
>>> +
>>> +rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
>>> +   rwco->zone_report.nr_zones,
>>> +   rwco->zone_report.zones);
>>> +blk_aio_complete(acb);
>>> +}
>>> +
>>> +BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
>>> +unsigned int *nr_zones,
>>> +BlockZoneDescriptor  *zones,
>>> +BlockCompletionFunc *cb, void *opaque)
>>> +{
>>> +BlkAioEmAIOCB *acb;
>>> +Coroutine *co;
>>> +IO_CODE();
>>> +
>>> +blk_inc_in_flight(blk);
>>> +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
>>> +acb->rwco = (BlkRwCo) {
>>> +.blk= blk,
>>> +.offset = offset,
>>> +.ret= NOT_DONE,
>>> +.zone_report = {
>>> +.zones = zones,
>>> +.nr_zones = nr_zones,
>>> +},
>>> +};
>>> +acb->has_returned = false;
>>> +
>>> +co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
>>> +bdrv_coroutine_enter(blk_bs(blk), co);
>>> +
>>> +acb->has_returned = true;
>>> +if (acb->rwco.ret != NOT_DONE) {
>>> +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
>>> + blk_aio_complete_bh, acb);
>>> +}
>>> +
>>> +return >common;
>>> +}
>>> +
>>> +static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque) {
>>> +BlkAioEmAIOCB *acb = opaque;
>>> +BlkRwCo *rwco = >rwco;
>>> +
>>> +rwco->ret = blk_co_zone_mgmt(rwco->blk, rwco->zone_mgmt.op,
>>> + rwco->offset, acb->bytes);
>>> +blk_aio_complete(acb);
>>> +}
>>> +
>>> +BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
>>> +  int64_t offset, int64_t len,
>>> +  BlockCompletionFunc *cb, void *opaque) {
>>> +BlkAioEmAIOCB *acb;
>>> +Coroutine *co;
>>> +IO_CODE();
>>> +
>>> +blk_inc_in_flight(blk);
>>> +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
>>> +acb->rwco = (BlkRwCo) {
>>> +.blk= blk,
>>> +.offset = offset,
>>> +.ret= NOT_DONE,
>>> +.zone_mgmt = {
>>> +.op = op,
>>> +},
>>> +};
>>> +acb->bytes = len;
>>> +acb->has_returned = false;
>>> +
>>> +co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
>>> +bdrv_coroutine_enter(blk_bs(blk), co);
>>> +
>>> +acb->has_returned = true;
>>> +if (acb->rwco.ret != NOT_DONE) {
>>> +

Re: [PATCH v11 3/7] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-10-12 Thread Sam Li

Damien Le Moal  于2022年10月13日周四 13:45写道：
>
> On 10/13/22 14:33, Sam Li wrote:
> > Damien Le Moal  于2022年10月13日周四 12:41写道：
> >>
> >> On 10/10/22 11:21, Sam Li wrote:
> >>> Add a new zoned_host_device BlockDriver. The zoned_host_device option
> >>> accepts only zoned host block devices. By adding zone management
> >>> operations in this new BlockDriver, users can use the new block
> >>> layer APIs including Report Zone and four zone management operations
> >>> (open, close, finish, reset, reset_all).
> >>>
> >>> Qemu-io uses the new APIs to perform zoned storage commands of the device:
> >>> zone_report(zrp), zone_open(zo), zone_close(zc), zone_reset(zrs),
> >>> zone_finish(zf).
> >>>
> >>> For example, to test zone_report, use following command:
> >>> $ ./build/qemu-io --image-opts -n driver=zoned_host_device, 
> >>> filename=/dev/nullb0
> >>> -c "zrp offset nr_zones"
> >>>
> >>> Signed-off-by: Sam Li 
> >>> Reviewed-by: Hannes Reinecke 
> >>> ---
> >>>  block/block-backend.c | 146 +
> >>>  block/file-posix.c| 329 ++
> >>>  block/io.c|  41 
> >>>  include/block/block-common.h  |   1 +
> >>>  include/block/block-io.h  |   7 +
> >>>  include/block/block_int-common.h  |  24 +++
> >>>  include/block/raw-aio.h   |   6 +-
> >>>  include/sysemu/block-backend-io.h |  17 ++
> >>>  meson.build   |   4 +
> >>>  qapi/block-core.json  |   8 +-
> >>>  qemu-io-cmds.c| 148 ++
> >>>  11 files changed, 728 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/block/block-backend.c b/block/block-backend.c
> >>> index d4a5df2ac2..ddc569e3ac 100644
> >>> --- a/block/block-backend.c
> >>> +++ b/block/block-backend.c
> >>> @@ -1431,6 +1431,15 @@ typedef struct BlkRwCo {
> >>>  void *iobuf;
> >>>  int ret;
> >>>  BdrvRequestFlags flags;
> >>> +union {
> >>> +struct {
> >>> +unsigned int *nr_zones;
> >>> +BlockZoneDescriptor *zones;
> >>> +} zone_report;
> >>> +struct {
> >>> +BlockZoneOp op;
> >>> +} zone_mgmt;
> >>> +};
> >>>  } BlkRwCo;
> >>>
> >>>  int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
> >>> @@ -1775,6 +1784,143 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
> >>>  return ret;
> >>>  }
> >>>
> >>> +static void coroutine_fn blk_aio_zone_report_entry(void *opaque) {
> >>> +BlkAioEmAIOCB *acb = opaque;
> >>> +BlkRwCo *rwco = >rwco;
> >>> +
> >>> +rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
> >>> +   rwco->zone_report.nr_zones,
> >>> +   rwco->zone_report.zones);
> >>> +blk_aio_complete(acb);
> >>> +}
> >>> +
> >>> +BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
> >>> +unsigned int *nr_zones,
> >>> +BlockZoneDescriptor  *zones,
> >>> +BlockCompletionFunc *cb, void *opaque)
> >>> +{
> >>> +BlkAioEmAIOCB *acb;
> >>> +Coroutine *co;
> >>> +IO_CODE();
> >>> +
> >>> +blk_inc_in_flight(blk);
> >>> +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> >>> +acb->rwco = (BlkRwCo) {
> >>> +.blk= blk,
> >>> +.offset = offset,
> >>> +.ret= NOT_DONE,
> >>> +.zone_report = {
> >>> +.zones = zones,
> >>> +.nr_zones = nr_zones,
> >>> +},
> >>> +};
> >>> +acb->has_returned = false;
> >>> +
> >>> +co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
> >>> +bdrv_coroutine_enter(blk_bs(blk), co);
> >>> +
> >>> +acb->has_returned = true;
> >>> +if (acb->rwco.ret != NOT_DONE) {
> >>> +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
> >>> + blk_aio_complete_bh, acb);
> >>> +}
> >>> +
> >>> +return >common;
> >>> +}
> >>> +
> >>> +static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque) {
> >>> +BlkAioEmAIOCB *acb = opaque;
> >>> +BlkRwCo *rwco = >rwco;
> >>> +
> >>> +rwco->ret = blk_co_zone_mgmt(rwco->blk, rwco->zone_mgmt.op,
> >>> + rwco->offset, acb->bytes);
> >>> +blk_aio_complete(acb);
> >>> +}
> >>> +
> >>> +BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
> >>> +  int64_t offset, int64_t len,
> >>> +  BlockCompletionFunc *cb, void *opaque) {
> >>> +BlkAioEmAIOCB *acb;
> >>> +Coroutine *co;
> >>> +IO_CODE();
> >>> +
> >>> +blk_inc_in_flight(blk);
> >>> +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> >>> +acb->rwco = (BlkRwCo) {
> >>> +.blk= blk,
> >>> +.offset = offset,
> >>> +.ret= NOT_DONE,
> >>> +.zone_mgmt = {
> >>> +.op = op,
> >>> +

Re: [PATCH v2] pci: Assert that capabilities never overlap

2022-10-12 Thread Akihiko Odaki

On Thu, Sep 29, 2022 at 7:55 PM Markus Armbruster  wrote:
>
> Akihiko Odaki  writes:
>
> > On Mon, Sep 5, 2022 at 6:26 PM Markus Armbruster  wrote:
> >>
> >> Akihiko Odaki  writes:
> >>
> >> > On Fri, Sep 2, 2022 at 7:23 PM Markus Armbruster  
> >> > wrote:
> >> >>
> >> >> Akihiko Odaki  writes:
> >> >>
> >> >> > pci_add_capability appears most PCI devices. Its error handling 
> >> >> > required
> >> >> > lots of code, and led to inconsistent behaviors such as:
> >> >> > - passing error_abort
> >> >> > - passing error_fatal
> >> >> > - asserting the returned value
> >> >> > - propagating the error to the caller
> >> >> > - skipping the rest of the function
> >> >> > - just ignoring
> >> >> >
> >> >> > The code generating errors in pci_add_capability had a comment which
> >> >> > says:
> >> >> >> Verify that capabilities don't overlap.  Note: device assignment
> >> >> >> depends on this check to verify that the device is not broken.
> >> >> >> Should never trigger for emulated devices, but it's helpful for
> >> >> >> debugging these.
> >> >> >
> >> >> > Indeed vfio has some code that passes capability offsets and sizes 
> >> >> > from
> >> >> > a physical device, but it explicitly pays attention so that the
> >> >> > capabilities never overlap.
> >> >>
> >> >> I can't see that at a glance.  Can you give me a clue?
> >> >>
> >> >> > Therefore, we can always assert that
> >> >> > capabilities never overlap when pci_add_capability is called, 
> >> >> > resolving
> >> >> > these inconsistencies.
> >> >> >
> >> >> > Signed-off-by: Akihiko Odaki 
> >> >>
> >> >
> >> > Looking at vfio_add_std_cap(), and vfio_add_ext_cap() it seems that
> >> > they are clipping the size of capabilities so that they do not
> >> > overlap, if I read it correctly.
> >>
> >> If we want to deal gracefully with buggy physical devices, we need to
> >> treat pdev->config[] as untrusted input.
> >>
> >> As far as I can tell:
> >>
> >> * vfio_add_capabilities() replicates the physical device's capabilities
> >>   (starting at pdev->config[PCI_CAPABILITY_LIST]) in the virtual device.
> >>
> >> * vfio_add_std_cap() is a helper to add the tail starting at
> >>   pdev->config[pos].
> >>
> >> Could the physical device's capabilities overlap?  If yes, what would
> >> happen before and after your series?
> >>
> >
> > When the capabilities overlap, vfio_std_cap_max_size() and
> > vfio_ext_cap_max_size(), called by vfio_add_std_cap(),
> > vfio_add_ext_cap() should clip the size of capabilities. Comments in
> > vfio_add_std_cap() and vfio_add_ext_cap() say: "Since QEMU doesn't
> > actually handle many of the config accesses, exact size doesn't seem
> > worthwhile."
>
> Weird :)
>
> Back to your patch.  Its core is dumbing down of pci_add_capability() so
> it can't fail anymore.  Instead it aborts on overlapping capabilities.
> Note that it already aborts when running out of PCI config space.
>
> The remainder of the patch simplifies callers accordingly.  Some callers
> ignore the error before the patch, some abort on error, and some pass it
> on to their callers.  Too much for me to review in detail.  We can talk
> about ways to split it up if that's desired.

Yes, I'd like to hear your ideas on how it should be split up. The
main challenge here is that it changes the signature of a function
without changing its name.

>
> My one concern is device assignment, which should treat the device's
> capabilities as untrusted input.  If you're right in that such
> capabilities get truncated before they reach pci_add_capability(), then
> your change from failure to abort is okay.
>
> I'd like to leave checking your correctness argument to actual PCI
> experts.
>

Included PCI maintainers in To. Michael S. Tsirkin and Marcel
Apfelbaum, Can you confirm this?

Regards,
Akihiko Odaki

Re: [PATCH v11 3/7] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-10-12 Thread Sam Li

Damien Le Moal  于2022年10月13日周四 12:41写道：
>
> On 10/10/22 11:21, Sam Li wrote:
> > Add a new zoned_host_device BlockDriver. The zoned_host_device option
> > accepts only zoned host block devices. By adding zone management
> > operations in this new BlockDriver, users can use the new block
> > layer APIs including Report Zone and four zone management operations
> > (open, close, finish, reset, reset_all).
> >
> > Qemu-io uses the new APIs to perform zoned storage commands of the device:
> > zone_report(zrp), zone_open(zo), zone_close(zc), zone_reset(zrs),
> > zone_finish(zf).
> >
> > For example, to test zone_report, use following command:
> > $ ./build/qemu-io --image-opts -n driver=zoned_host_device, 
> > filename=/dev/nullb0
> > -c "zrp offset nr_zones"
> >
> > Signed-off-by: Sam Li 
> > Reviewed-by: Hannes Reinecke 
> > ---
> >  block/block-backend.c | 146 +
> >  block/file-posix.c| 329 ++
> >  block/io.c|  41 
> >  include/block/block-common.h  |   1 +
> >  include/block/block-io.h  |   7 +
> >  include/block/block_int-common.h  |  24 +++
> >  include/block/raw-aio.h   |   6 +-
> >  include/sysemu/block-backend-io.h |  17 ++
> >  meson.build   |   4 +
> >  qapi/block-core.json  |   8 +-
> >  qemu-io-cmds.c| 148 ++
> >  11 files changed, 728 insertions(+), 3 deletions(-)
> >
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index d4a5df2ac2..ddc569e3ac 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -1431,6 +1431,15 @@ typedef struct BlkRwCo {
> >  void *iobuf;
> >  int ret;
> >  BdrvRequestFlags flags;
> > +union {
> > +struct {
> > +unsigned int *nr_zones;
> > +BlockZoneDescriptor *zones;
> > +} zone_report;
> > +struct {
> > +BlockZoneOp op;
> > +} zone_mgmt;
> > +};
> >  } BlkRwCo;
> >
> >  int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
> > @@ -1775,6 +1784,143 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
> >  return ret;
> >  }
> >
> > +static void coroutine_fn blk_aio_zone_report_entry(void *opaque) {
> > +BlkAioEmAIOCB *acb = opaque;
> > +BlkRwCo *rwco = >rwco;
> > +
> > +rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
> > +   rwco->zone_report.nr_zones,
> > +   rwco->zone_report.zones);
> > +blk_aio_complete(acb);
> > +}
> > +
> > +BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
> > +unsigned int *nr_zones,
> > +BlockZoneDescriptor  *zones,
> > +BlockCompletionFunc *cb, void *opaque)
> > +{
> > +BlkAioEmAIOCB *acb;
> > +Coroutine *co;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk);
> > +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> > +acb->rwco = (BlkRwCo) {
> > +.blk= blk,
> > +.offset = offset,
> > +.ret= NOT_DONE,
> > +.zone_report = {
> > +.zones = zones,
> > +.nr_zones = nr_zones,
> > +},
> > +};
> > +acb->has_returned = false;
> > +
> > +co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
> > +bdrv_coroutine_enter(blk_bs(blk), co);
> > +
> > +acb->has_returned = true;
> > +if (acb->rwco.ret != NOT_DONE) {
> > +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
> > + blk_aio_complete_bh, acb);
> > +}
> > +
> > +return >common;
> > +}
> > +
> > +static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque) {
> > +BlkAioEmAIOCB *acb = opaque;
> > +BlkRwCo *rwco = >rwco;
> > +
> > +rwco->ret = blk_co_zone_mgmt(rwco->blk, rwco->zone_mgmt.op,
> > + rwco->offset, acb->bytes);
> > +blk_aio_complete(acb);
> > +}
> > +
> > +BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
> > +  int64_t offset, int64_t len,
> > +  BlockCompletionFunc *cb, void *opaque) {
> > +BlkAioEmAIOCB *acb;
> > +Coroutine *co;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk);
> > +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> > +acb->rwco = (BlkRwCo) {
> > +.blk= blk,
> > +.offset = offset,
> > +.ret= NOT_DONE,
> > +.zone_mgmt = {
> > +.op = op,
> > +},
> > +};
> > +acb->bytes = len;
> > +acb->has_returned = false;
> > +
> > +co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
> > +bdrv_coroutine_enter(blk_bs(blk), co);
> > +
> > +acb->has_returned = true;
> > +if (acb->rwco.ret != NOT_DONE) {
> > +

Re: [PATCH v3 1/3] file-posix:add the tracking of the zones write pointers

2022-10-12 Thread Damien Le Moal

On 10/10/22 11:33, Sam Li wrote:
> Since Linux doesn't have a user API to issue zone append operations to
> zoned devices from user space, the file-posix driver is modified to add
> zone append emulation using regular writes. To do this, the file-posix
> driver tracks the wp location of all zones of the device. It uses an
> array of uint64_t. The most significant bit of each wp location indicates
> if the zone type is conventional zones.
> 
> The zones wp can be changed due to the following operations issued:
> - zone reset: change the wp to the start offset of that zone
> - zone finish: change to the end location of that zone
> - write to a zone
> - zone append
> 
> Signed-off-by: Sam Li 
> ---
>  block/file-posix.c   | 158 +++
>  include/block/block-common.h |  14 +++
>  include/block/block_int-common.h |   5 +
>  3 files changed, 177 insertions(+)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index a9d347292e..17c0b58158 100755
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -206,6 +206,7 @@ typedef struct RawPosixAIOData {
>  struct {
>  struct iovec *iov;
>  int niov;
> +int64_t *append_sector;

This should be added as part of patch 2. You do not need this to track
the wp of zones in this patch.

>  } io;
>  struct {
>  uint64_t cmd;
> @@ -226,6 +227,7 @@ typedef struct RawPosixAIOData {
>  struct {
>  unsigned long zone_op;
>  const char *zone_op_name;
> +bool all;
>  } zone_mgmt;
>  };
>  } RawPosixAIOData;
> @@ -1331,6 +1333,67 @@ static int hdev_get_max_segments(int fd, struct stat 
> *st) {
>  #endif
>  }
>  
> +#if defined(CONFIG_BLKZONED)
> +static int get_zones_wp(int64_t offset, int fd, BlockZoneWps *wps,

Nit: It would seem more natural to have the fd argument first...

> +unsigned int nrz) {
> +struct blk_zone *blkz;
> +int64_t rep_size;
> +int64_t sector = offset >> BDRV_SECTOR_BITS;
> +int ret, n = 0, i = 0;
> +rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct 
> blk_zone);
> +g_autofree struct blk_zone_report *rep = NULL;
> +
> +rep = g_malloc(rep_size);
> +blkz = (struct blk_zone *)(rep + 1);
> +while (n < nrz) {
> +memset(rep, 0, rep_size);
> +rep->sector = sector;
> +rep->nr_zones = nrz - n;
> +
> +do {
> +ret = ioctl(fd, BLKREPORTZONE, rep);
> +} while (ret != 0 && errno == EINTR);
> +if (ret != 0) {
> +error_report("%d: ioctl BLKREPORTZONE at %" PRId64 " failed %d",
> +fd, offset, errno);
> +return -errno;
> +}
> +
> +if (!rep->nr_zones) {
> +break;
> +}
> +
> +for (i = 0; i < rep->nr_zones; i++, n++) {
> +/*
> + * The wp tracking cares only about sequential writes required 
> and
> + * sequential write preferred zones so that the wp can advance to
> + * the right location.
> + * Use the most significant bit of the wp location to indicate 
> the
> + * zone type: 0 for SWR/SWP zones and 1 for conventional zones.
> + */
> +if (!(blkz[i].type != BLK_ZONE_TYPE_CONVENTIONAL)) {

Double negation... This can simply be:

if (blkz[i].type == BLK_ZONE_TYPE_CONVENTIONAL) {

> +wps->wp[i] += 1ULL << 63;

No need for the += here. This can be "=".

> +} else {
> +wps->wp[i] = blkz[i].wp << BDRV_SECTOR_BITS;
> +}
> +}
> +sector = blkz[i-1].start + blkz[i-1].len;

spaces missing around the "-" in the "i-1" expressions.

> +}
> +
> +return 0;
> +}
> +
> +static void update_zones_wp(int64_t offset, int fd, BlockZoneWps *wps,

Same nit as above: fd being the first argument would be a little more
natural in my opinion.

> +unsigned int nrz) {
> +qemu_mutex_lock(>lock);
> +if (get_zones_wp(offset, fd, wps, nrz) < 0) {
> +error_report("report zone wp failed");
> +return;

You are leacking the lock here. Remove the return. Also, given that
get_zones_wp() already prints a message if report fails, I do not think
the message here is useful.

Also, why is this function void typed ? How can the caller know if the
update succeeded or not ?

> +}
> +qemu_mutex_unlock(>lock);
> +}
> +#endif
> +
>  static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
>  {
>  BDRVRawState *s = bs->opaque;
> @@ -1414,6 +1477,19 @@ static void raw_refresh_limits(BlockDriverState *bs, 
> Error **errp)
>  error_report("Invalid device capacity %" PRId64 " bytes ", 
> bs->bl.capacity);
>  return;
>  }
> +
> +ret = get_sysfs_long_val(, "physical_block_size");
> +if (ret >= 0) {
> +bs->bl.write_granularity = ret;
> +

Re: FAILED: libqemu-arm-bsd-user.fa.p/bsd-user_signal.c.o

2022-10-12 Thread Warner Losh

On Mon, Oct 10, 2022 at 11:01 AM Warner Losh  wrote:

>
>
> On Mon, Oct 10, 2022 at 1:13 AM Thomas Huth  wrote:
>
>> On 10/10/2022 08.18, Dennis Clarke wrote:
>> >
>> > On FreeBSD 14.0 CURRENT amd64 everything seems to go swimmingly until :
>> >
>> > [5679/6848] Compiling C object
>> libqemu-arm-bsd-user.fa.p/bsd-user_mmap.c.o
>> > [5680/6848] Compiling C object
>> libqemu-arm-bsd-user.fa.p/bsd-user_signal.c.o
>> > FAILED: libqemu-arm-bsd-user.fa.p/bsd-user_signal.c.o
>> > /usr/bin/cc -m64 -mcx16 -Ilibqemu-arm-bsd-user.fa.p -I. -I..
>> -Itarget/arm
>> > -I../target/arm -I../common-user/host/x86_64 -I../bsd-user/include
>> > -Ibsd-user/freebsd -I../bsd-user/freebsd -I../bsd-user/host/x86_64
>> > -Ibsd-user -I../bsd-user -I../bsd-user/arm -Iqapi -Itrace -Iui
>> -Iui/shader
>> > -I/usr/local/include -I/usr/local/include/glib-2.0
>> > -I/usr/local/lib/glib-2.0/include -fcolor-diagnostics -Wall
>> -Winvalid-pch
>> > -std=gnu11 -O0 -g -iquote . -iquote /opt/bw/build/qemu -iquote
>> > /opt/bw/build/qemu/include -iquote /opt/bw/build/qemu/tcg/i386 -pthread
>> > -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
>> -Wstrict-prototypes
>> > -Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes
>> > -fno-strict-aliasing -fno-common -fwrapv -Wold-style-definition
>> > -Wtype-limits -Wformat-security -Wformat-y2k -Winit-self
>> > -Wignored-qualifiers -Wempty-body -Wnested-externs -Wendif-labels
>> > -Wexpansion-to-defined -Wno-initializer-overrides
>> -Wno-missing-include-dirs
>> > -Wno-shift-negative-value -Wno-string-plus-int
>> -Wno-typedef-redefinition
>> > -Wno-tautological-type-limit-compare -Wno-psabi
>> > -Wno-gnu-variable-sized-type-not-at-end -fstack-protector-strong -m64
>> -g -O0
>> > -fno-builtin -fPIE -DNEED_CPU_H
>> > '-DCONFIG_TARGET="arm-bsd-user-config-target.h"'
>> > '-DCONFIG_DEVICES="arm-bsd-user-config-devices.h"' -MD -MQ
>> > libqemu-arm-bsd-user.fa.p/bsd-user_signal.c.o -MF
>> > libqemu-arm-bsd-user.fa.p/bsd-user_signal.c.o.d -o
>> > libqemu-arm-bsd-user.fa.p/bsd-user_signal.c.o -c ../bsd-user/signal.c
>> > In file included from ../bsd-user/signal.c:27:
>> > In file included from ../bsd-user/host/x86_64/host-signal.h:14:
>> > In file included from /usr/include/vm/pmap.h:92:
>> > /usr/include/machine/pmap.h:452:2: error: fields must have a constant
>> size:
>> > 'variable length array in structure' extension will never be supported
>> >  PV_CHUNK_HEADER
>> >  ^
>> > /usr/include/machine/pmap.h:448:12: note: expanded from macro
>> 'PV_CHUNK_HEADER'
>> >  uint64_tpc_map[_NPCM];  /* bitmap; 1 = free
>> */  \
>> >  ^
>> > /usr/include/machine/pmap.h:456:2: error: fields must have a constant
>> size:
>> > 'variable length array in structure' extension will never be supported
>> >  PV_CHUNK_HEADER
>> >  ^
>> > /usr/include/machine/pmap.h:448:12: note: expanded from macro
>> 'PV_CHUNK_HEADER'
>> >  uint64_tpc_map[_NPCM];  /* bitmap; 1 = free
>> */  \
>> >  ^
>> > 2 errors generated.
>> > ninja: build stopped: subcommand failed.
>> > gmake[1]: *** [Makefile:165: run-ninja] Error 1
>> > gmake[1]: Leaving directory '/opt/bw/build/qemu/build'
>> > gmake: *** [GNUmakefile:11: all] Error 2
>> >
>> > phobos#
>> >
>> > Is there a trivial patch ?  Or perhaps try again using GCC and not
>> LLVM/Clang?
>>
>> I'm not using FreeBSD, so no real clue, but this pretty much sounds like
>> _NPCM is not properly defined by your system headers anymore, so I assume
>> this is a problem on the FreeBSD side ... I'd suggest to report it on the
>> FreeBSD mailing list.
>>
>
> Actually, it is properly defined. The real problem is that it depends on
> howmany, which is defined
> in sys/param.h, which isn't included in sys/_pv_entry.h, leading to the
> problem. This makes it look
> like a variable length array which compilers hate in this context.
>
> diff --git a/bsd-user/host/x86_64/host-signal.h
> b/bsd-user/host/x86_64/host-signal.h
> index 47ca19f8814..32ac4e41803 100644
> --- a/bsd-user/host/x86_64/host-signal.h
> +++ b/bsd-user/host/x86_64/host-signal.h
> @@ -9,6 +9,7 @@
>  #ifndef X86_64_HOST_SIGNAL_H
>  #define X86_64_HOST_SIGNAL_H
>
> +#include 
>  #include 
>  #include 
>  #include 
>
> fixes it. It's unclear to me if this should be added to sys/_pv_entry.h
> (this was just committed to
> FreeBSD in the last week), or if I need to upstream this patch.
>

I posted a patch here (i386 needed it too). I also fixed it in FreeBSD, but
the breakage was
around for long enough we kinda need both. I'll submit a pull request if
somebody else
doesn't beat me to it. :).

Warner

Re: [PATCH 3/4] qtest: Improve error messages when property can not be set right now

2022-10-12 Thread Markus Armbruster

Thomas Huth  writes:

> On 12/10/2022 17.38, Markus Armbruster wrote:
>> When you try to set qtest property "log" while the qtest object is
>> active, the error message blames "insufficient permission":
>> 
>>  $ qemu-system-x86_64 -S -display none -nodefaults -monitor stdio 
>> -chardev socket,id=chrqt0,path=qtest.socket,server=on,wait=off -object 
>> qtest,id=qt0,chardev=chrqt0,log=/dev/null
>>  QEMU 7.1.50 monitor - type 'help' for more information
>>  (qemu) qom-set /objects/qt0 log qtest.log
>>  Error: Insufficient permission to perform this operation
>> 
>> This implies it could work with "sufficient permission".  It can't.
>> Change the error message to:
>> 
>>  Error: Property 'log' can not be set now
>
> Can it be set later? ... if not, that error message is almost as confusing 
> as the original one. Maybe it's better to tell the users *when* they can set 
> the property?

The property cannot be set while the object is "active", i.e. global
@qtest points to it.

Right now, @qtest points to the object from completion with
user_creatable_complete() to unparent.

Completion fails when @qtest already points to another object, i.e. only
one object can be complete at any time.

Since Paolo took the trouble to code an unparent method, I assume
unparent can happen.  I can't tell offhand when.

Help!

Re: [PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa

2022-10-12 Thread Jason Wang




在 2022/10/12 13:59, Si-Wei Liu 写道:



On 10/11/2022 8:09 PM, Jason Wang wrote:

On Tue, Oct 11, 2022 at 1:18 AM Si-Wei Liu  wrote:

On 10/8/2022 10:43 PM, Jason Wang wrote:

On Sat, Oct 8, 2022 at 5:04 PM Si-Wei Liu  wrote:

Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
backend as another parameter to instantiate vhost-vdpa net client.
This would benefit the use case where only open file descriptors, as
opposed to raw vhost-vdpa device paths, are accessible from the QEMU
process.

(qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1

Adding Cindy.

This has been discussed before, we've already had
vhostdev=/dev/fdset/$fd which should be functional equivalent to what
has been proposed here. (And this is how libvirt works if I understand
correctly).

Yes, I was aware of that discussion. However, our implementation of the 
management software is a bit different from libvirt, in which the paths in 
/dev/fdset/NNN can't be dynamically passed to the container where QEMU is 
running. By using a specific vhostfd property with existing code, it would 
allow our mgmt software smooth adaption without having to add too much infra 
code to support the /dev/fdset/NNN trick.

I think fdset has extra flexibility in e.g hot-plug to allow the file
descriptor to be passed with SCM_RIGHTS.
Yes, that's exactly the use case we'd like to support. Though the 
difference in our mgmt software stack from libvirt is that any dynamic 
path in /dev (like /dev/fdset/ABC or /dev/vhost-vdpa-XYZ) can't be 
allowed to get passed through to the container running QEMU on the fly 
for security reasons. fd passing is allowed, though, with very strict 
security checks.



Interesting, any reason for disallowing fd passing? I'm asking since 
it's the way that libvirt work and it seems to me we don't get any 
complaints in the past.



That's the main motivation for this direct vhostfd passing support 
(noted fdset doesn't need to be used along with /dev/fdset node).


Having it said, I found there's also nuance in the 
vhostdev=/dev/fdset/XyZ interface besides the /dev node limitation: 
the fd to open has to be dup'ed from the original one passed via 
SCM_RIGHTS. This also has implication on security that any ioctl call 
from QEMU can't be audited through the original fd.



I'm not sure I get this, but management layer can enforce a ioctl 
whiltelist for safety.


Thanks


With this regard, I think vhostfd offers more flexibility than work 
around those qemu_open() specifics. Would these justify the use case 
of concern?


Thanks,
-Siwei


  It would still be good to add
the support.


On the other hand, the other vhost backends, e.g. tap (via vhost-net), 
vhost-scsi and vhost-vsock all accept vhostfd as parameter to instantiate 
device, although the /dev/fdset trick also works there. I think vhost-vdpa is 
not  unprecedented in this case?

Yes.

Thanks


Thanks,
-Siwei



Thanks

Signed-off-by: Si-Wei Liu
Acked-by: Eugenio Pérez

---
v2:
   - fixed typo in commit message
   - s/fd's/file descriptors/
---
  net/vhost-vdpa.c | 25 -
  qapi/net.json|  3 +++
  qemu-options.hx  |  6 --
  3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 182b3a1..366b070 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,

  assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
  opts = >u.vhost_vdpa;
-if (!opts->vhostdev) {
-error_setg(errp, "vdpa character device not specified with vhostdev");
+if (!opts->has_vhostdev && !opts->has_vhostfd) {
+error_setg(errp,
+   "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
  return -1;
  }

-vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
-if (vdpa_device_fd == -1) {
-return -errno;
+if (opts->has_vhostdev && opts->has_vhostfd) {
+error_setg(errp,
+   "vhost-vdpa: vhostdev= and vhostfd= are mutually 
exclusive");
+return -1;
+}
+
+if (opts->has_vhostdev) {
+vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
+if (vdpa_device_fd == -1) {
+return -errno;
+}
+} else if (opts->has_vhostfd) {
+vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
+if (vdpa_device_fd == -1) {
+error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
+return -1;
+}
  }

  r = vhost_vdpa_get_features(vdpa_device_fd, , errp);
diff --git a/qapi/net.json b/qapi/net.json
index dd088c0..926ecc8 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -442,6 +442,8 @@
  # @vhostdev: path of vhost-vdpa device
  #(default:'/dev/vhost-vdpa-0')
  #
+# @vhostfd: file descriptor of an already opened vhost vdpa device
+#
  # @queues: number of queues to be created for multiqueue vhost-vdpa

Re: [PATCH v11 7/7] docs/zoned-storage: add zoned device documentation

2022-10-12 Thread Damien Le Moal

On 10/10/22 11:21, Sam Li wrote:
> Add the documentation about the zoned device support to virtio-blk
> emulation.
> 
> Signed-off-by: Sam Li 
> Reviewed-by: Stefan Hajnoczi 
> ---
>  docs/devel/zoned-storage.rst   | 40 ++
>  docs/system/qemu-block-drivers.rst.inc |  6 
>  2 files changed, 46 insertions(+)
>  create mode 100644 docs/devel/zoned-storage.rst
> 
> diff --git a/docs/devel/zoned-storage.rst b/docs/devel/zoned-storage.rst
> new file mode 100644
> index 00..deaa4ce99b
> --- /dev/null
> +++ b/docs/devel/zoned-storage.rst
> @@ -0,0 +1,40 @@
> +=
> +zoned-storage
> +=
> +
> +Zoned Block Devices (ZBDs) devide the LBA space into block regions called 
> zones
> +that are larger than the LBA size. They can only allow sequential writes, 
> which
> +can reduce write amplification in SSDs, and potentially lead to higher
> +throughput and increased capacity. More details about ZBDs can be found at:
> +
> +https://zonedstorage.io/docs/introduction/zoned-storage
> +
> +1. Block layer APIs for zoned storage
> +-
> +QEMU block layer has three zoned storage model:
> +- BLK_Z_HM: This model only allows sequential writes access. It supports a 
> set

- BLK_Z_HM: The host-managed zoned model...

> +of ZBD-specific I/O request that used by the host to manage device zones.

...of ZBD-specific commands that can be used by a host to manage the
zones of a device.

> +- BLK_Z_HA: It deals with both sequential writes and random writes access.

- BLK_Z_HA: The host-aware zoned model allows random write operations in
zones, making it backward compatible with regular block devices.

> +- BLK_Z_NONE: Regular block devices and drive-managed ZBDs are treated as
> +non-zoned devices.
> +
> +The block device information resides inside BlockDriverState. QEMU uses
> +BlockLimits struct(BlockDriverState::bl) that is continuously accessed by the
> +block layer while processing I/O requests. A BlockBackend has a root pointer 
> to
> +a BlockDriverState graph(for example, raw format on top of file-posix). The
> +zoned storage information can be propagated from the leaf BlockDriverState 
> all
> +the way up to the BlockBackend. If the zoned storage model in file-posix is
> +set to BLK_Z_HM, then block drivers will declare support for zoned host 
> device.
> +
> +The block layer APIs support commands needed for zoned storage devices,
> +including report zones, four zone operations, and zone append.
> +
> +2. Emulating zoned storage controllers
> +--
> +When the BlockBackend's BlockLimits model reports a zoned storage device, 
> users
> +like the virtio-blk emulation or the qemu-io-cmds.c utility can use block 
> layer
> +APIs for zoned storage emulation or testing.
> +
> +For example, to test zone_report on a null_blk device using qemu-io is:
> +$ path/to/qemu-io --image-opts -n 
> driver=zoned_host_device,filename=/dev/nullb0
> +-c "zrp offset nr_zones"
> diff --git a/docs/system/qemu-block-drivers.rst.inc 
> b/docs/system/qemu-block-drivers.rst.inc
> index dfe5d2293d..0b97227fd9 100644
> --- a/docs/system/qemu-block-drivers.rst.inc
> +++ b/docs/system/qemu-block-drivers.rst.inc
> @@ -430,6 +430,12 @@ Hard disks
>you may corrupt your host data (use the ``-snapshot`` command
>line option or modify the device permissions accordingly).
>  
> +Zoned block devices
> +  Zoned block devices can be passed through to the guest if the emulated 
> storage
> +  controller supports zoned storage. Use ``--blockdev zoned_host_device,
> +  node-name=drive0,filename=/dev/nullb0`` to pass through ``/dev/nullb0``
> +  as ``drive0``.
> +
>  Windows
>  ^^^
>  

With the above nits fixed, feel free to add:

Reviewed-by: Damien Le Moal 

-- 
Damien Le Moal
Western Digital Research

Re: [PATCH v11 3/7] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-10-12 Thread Damien Le Moal

On 10/10/22 11:21, Sam Li wrote:
> Add a new zoned_host_device BlockDriver. The zoned_host_device option
> accepts only zoned host block devices. By adding zone management
> operations in this new BlockDriver, users can use the new block
> layer APIs including Report Zone and four zone management operations
> (open, close, finish, reset, reset_all).
> 
> Qemu-io uses the new APIs to perform zoned storage commands of the device:
> zone_report(zrp), zone_open(zo), zone_close(zc), zone_reset(zrs),
> zone_finish(zf).
> 
> For example, to test zone_report, use following command:
> $ ./build/qemu-io --image-opts -n driver=zoned_host_device, 
> filename=/dev/nullb0
> -c "zrp offset nr_zones"
> 
> Signed-off-by: Sam Li 
> Reviewed-by: Hannes Reinecke 
> ---
>  block/block-backend.c | 146 +
>  block/file-posix.c| 329 ++
>  block/io.c|  41 
>  include/block/block-common.h  |   1 +
>  include/block/block-io.h  |   7 +
>  include/block/block_int-common.h  |  24 +++
>  include/block/raw-aio.h   |   6 +-
>  include/sysemu/block-backend-io.h |  17 ++
>  meson.build   |   4 +
>  qapi/block-core.json  |   8 +-
>  qemu-io-cmds.c| 148 ++
>  11 files changed, 728 insertions(+), 3 deletions(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index d4a5df2ac2..ddc569e3ac 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1431,6 +1431,15 @@ typedef struct BlkRwCo {
>  void *iobuf;
>  int ret;
>  BdrvRequestFlags flags;
> +union {
> +struct {
> +unsigned int *nr_zones;
> +BlockZoneDescriptor *zones;
> +} zone_report;
> +struct {
> +BlockZoneOp op;
> +} zone_mgmt;
> +};
>  } BlkRwCo;
>  
>  int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
> @@ -1775,6 +1784,143 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
>  return ret;
>  }
>  
> +static void coroutine_fn blk_aio_zone_report_entry(void *opaque) {
> +BlkAioEmAIOCB *acb = opaque;
> +BlkRwCo *rwco = >rwco;
> +
> +rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
> +   rwco->zone_report.nr_zones,
> +   rwco->zone_report.zones);
> +blk_aio_complete(acb);
> +}
> +
> +BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
> +unsigned int *nr_zones,
> +BlockZoneDescriptor  *zones,
> +BlockCompletionFunc *cb, void *opaque)
> +{
> +BlkAioEmAIOCB *acb;
> +Coroutine *co;
> +IO_CODE();
> +
> +blk_inc_in_flight(blk);
> +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> +acb->rwco = (BlkRwCo) {
> +.blk= blk,
> +.offset = offset,
> +.ret= NOT_DONE,
> +.zone_report = {
> +.zones = zones,
> +.nr_zones = nr_zones,
> +},
> +};
> +acb->has_returned = false;
> +
> +co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
> +bdrv_coroutine_enter(blk_bs(blk), co);
> +
> +acb->has_returned = true;
> +if (acb->rwco.ret != NOT_DONE) {
> +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
> + blk_aio_complete_bh, acb);
> +}
> +
> +return >common;
> +}
> +
> +static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque) {
> +BlkAioEmAIOCB *acb = opaque;
> +BlkRwCo *rwco = >rwco;
> +
> +rwco->ret = blk_co_zone_mgmt(rwco->blk, rwco->zone_mgmt.op,
> + rwco->offset, acb->bytes);
> +blk_aio_complete(acb);
> +}
> +
> +BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
> +  int64_t offset, int64_t len,
> +  BlockCompletionFunc *cb, void *opaque) {
> +BlkAioEmAIOCB *acb;
> +Coroutine *co;
> +IO_CODE();
> +
> +blk_inc_in_flight(blk);
> +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> +acb->rwco = (BlkRwCo) {
> +.blk= blk,
> +.offset = offset,
> +.ret= NOT_DONE,
> +.zone_mgmt = {
> +.op = op,
> +},
> +};
> +acb->bytes = len;
> +acb->has_returned = false;
> +
> +co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
> +bdrv_coroutine_enter(blk_bs(blk), co);
> +
> +acb->has_returned = true;
> +if (acb->rwco.ret != NOT_DONE) {
> +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
> + blk_aio_complete_bh, acb);
> +}
> +
> +return >common;
> +}
> +
> +/*
> + * Send a zone_report command.
> + * offset is a byte offset from the start of the device. No alignment
> + * required for offset.
> + * nr_zones represents IN

[PATCH v2] tcg/loongarch64: Add direct jump support

2022-10-12 Thread Qi Hu

Similar to the ARM64, LoongArch has PC-relative instructions such as
PCADDU18I. These instructions can be used to support direct jump for
LoongArch. Additionally, if instruction "B offset" can cover the target
address(target is within ±128MB range), a single "B offset" plus a nop
will be used by "tb_target_set_jump_target".

Signed-off-by: Qi Hu 
---
 tcg/loongarch64/tcg-target.c.inc | 53 +---
 tcg/loongarch64/tcg-target.h |  3 +-
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index f5a214a17f..9f9508836a 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1031,6 +1031,36 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args)
 #endif
 }
 
+/* LoongArch use `andi zero, zero, 0` as NOP.  */
+#define NOP OPC_ANDI
+static void tcg_out_nop(TCGContext *s)
+{
+   tcg_out32(s, NOP);
+}
+
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+  uintptr_t jmp_rw, uintptr_t addr)
+{
+tcg_insn_unit i1, i2;
+ptrdiff_t upper, lower;
+ptrdiff_t offset = (addr - jmp_rx) >> 2;
+
+if (offset == sextreg(offset, 0, 28)) {
+i1 = encode_sd10k16_insn(OPC_B, offset);
+i2 = NOP;
+} else {
+upper = ((offset + (1 << 15)) >> 16) & 0xf;
+lower = (offset & 0x);
+/* patch pcaddu18i */
+i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_T0, upper);
+/* patch jirl */
+i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_T0, lower);
+}
+uint64_t pair = ((uint64_t)i2 << 32) | i1;
+qatomic_set((uint64_t *)jmp_rw, pair);
+flush_idcache_range(jmp_rx, jmp_rw, 8);
+}
+
 /*
  * Entry-points
  */
@@ -1058,11 +1088,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 break;
 
 case INDEX_op_goto_tb:
-assert(s->tb_jmp_insn_offset == 0);
-/* indirect jump method */
-tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
-   (uintptr_t)(s->tb_jmp_target_addr + a0));
-tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+if (s->tb_jmp_insn_offset != NULL) {
+/* TCG_TARGET_HAS_direct_jump */
+/* Ensure that "patch area" are 8-byte aligned so that an
+   atomic write can be used to patch the target address. */
+if ((uintptr_t)s->code_ptr & 7) {
+tcg_out_nop(s);
+}
+s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+/* actual branch destination will be patched by
+   tb_target_set_jmp_target later */
+tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
+tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+} else {
+/* !TCG_TARGET_HAS_direct_jump */
+tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
+(uintptr_t)(s->tb_jmp_target_addr + a0));
+tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+}
 set_jmp_reset_offset(s, a0);
 break;
 
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 67380b2432..c008d5686d 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -123,7 +123,7 @@ typedef enum {
 #define TCG_TARGET_HAS_clz_i32  1
 #define TCG_TARGET_HAS_ctz_i32  1
 #define TCG_TARGET_HAS_ctpop_i320
-#define TCG_TARGET_HAS_direct_jump  0
+#define TCG_TARGET_HAS_direct_jump  1
 #define TCG_TARGET_HAS_brcond2  0
 #define TCG_TARGET_HAS_setcond2 0
 #define TCG_TARGET_HAS_qemu_st8_i32 0
@@ -166,7 +166,6 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i641
 #define TCG_TARGET_HAS_mulsh_i641
 
-/* not defined -- call should be eliminated at compile time */
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
 #define TCG_TARGET_DEFAULT_MO (0)
-- 
2.37.3

Re: ublk-qcow2: ublk-qcow2 is available

2022-10-12 Thread Ming Lei

On Wed, Oct 12, 2022 at 10:15:28AM -0400, Stefan Hajnoczi wrote:
> On Thu, 6 Oct 2022 at 06:14, Richard W.M. Jones  wrote:
> >
> > On Tue, Oct 04, 2022 at 09:53:32AM -0400, Stefan Hajnoczi wrote:
> > > qemu-nbd doesn't use io_uring to handle the backend IO,
> >
> > Would this be fixed by your (not yet upstream) libblkio driver for
> > qemu?
> 
> I was wrong, qemu-nbd has syntax to use io_uring:
> 
>   $ qemu-nbd ... --image-opts driver=file,filename=test.img,aio=io_uring

Yeah, I saw the option, previously when I tried io_uring via:

qemu-nbd -c /dev/nbd11 -n --aio=io_uring $my_file

It complains that 'qemu-nbd: Invalid aio mode 'io_uring'' even though
that 'qemu-nbd --help' does say that io_uring is supported.

Today just tried it on Fedora 37, looks it starts working with
--aio=io_uring, but the IOPS is basically same with --aio=native, and
IO trace shows that io_uring is used by qemu-nbd.


Thanks,
Ming

Re: [PATCH 2/2] tcg/loongarch64: Add direct jump support

2022-10-12 Thread Qi Hu




On 2022/10/12 19:34, WANG Xuerui wrote:

Hi,

Thanks for the improvement! Some room for improvement though...

On 2022/10/12 17:13, Qi Hu wrote:

Similar to the ARM64, LoongArch has PC-relative instructions such as
PCADDU18I. These instructions can be used to support direct jump for
LoongArch. Additionally, if instruction "B offset" can cover the target
address, "tb_target_set_jmp_target" will only patch the "B offset".


"if the target is within +/- 28 bits range, a single "B offset" plus a 
nop will be used instead" might sound better?

Yeah, I will add this at commit message. :)




Signed-off-by: Qi Hu 
---
  tcg/loongarch64/tcg-insn-defs.c.inc |  3 ++
  tcg/loongarch64/tcg-target.c.inc    | 49 ++---
  tcg/loongarch64/tcg-target.h    |  2 +-
  3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc 
b/tcg/loongarch64/tcg-insn-defs.c.inc

index d162571856..f5869c6bb1 100644
--- a/tcg/loongarch64/tcg-insn-defs.c.inc
+++ b/tcg/loongarch64/tcg-insn-defs.c.inc
@@ -112,6 +112,9 @@ typedef enum {
  OPC_BLE = 0x6400,
  OPC_BGTU = 0x6800,
  OPC_BLEU = 0x6c00,
+    /* pseudo-instruction */
+    NOP = 0x0340,
+


You certainly saw the big fat comment block saying the file was 
auto-generated and thus shouldn't be touched, didn't you? ;-)


I saw your need for a NOP constant later though, and you can instead 
add `#define OPC_NOP OPC_ANDI` in tcg-target.c.inc, just below the 
include of tcg-insn-defs.c.inc.

Oh, I think I can add  "tcg_out_nop" instead of "NOP" here.



  } LoongArchInsn;
    static int32_t __attribute__((unused))
diff --git a/tcg/loongarch64/tcg-target.c.inc 
b/tcg/loongarch64/tcg-target.c.inc

index f5a214a17f..3a7b1df081 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1058,11 +1058,24 @@ static void tcg_out_op(TCGContext *s, 
TCGOpcode opc,

  break;
    case INDEX_op_goto_tb:
-    assert(s->tb_jmp_insn_offset == 0);
-    /* indirect jump method */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
-   (uintptr_t)(s->tb_jmp_target_addr + a0));
-    tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+    if (s->tb_jmp_insn_offset != NULL) {
+    /* TCG_TARGET_HAS_direct_jump */
+    /* Ensure that PCADD+JIRL are 8-byte aligned so that an 
atomic

+   write can be used to patch the target address. */
There isn't a "PCADD" in LoongArch, and it's possible for the 2 insns 
to be "B + NOP" as well. So better reword a bit like "Ensure that the 
8-byte direct jump fragment is aligned ..." (and add another space 
after the period at the end of the sentence).

~

+    if ((uintptr_t)s->code_ptr & 7) {
+    tcg_out32(s, NOP);
+    }
+    s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+    /* actual branch destination will be patched by
+   tb_target_set_jmp_target later. */
Either make it a proper sentence by title-casing "actual" and adding 
another space after the trailing period, or remove the period.

I will modify these comments.

+    tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
+    tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+    } else {
+    /* !TCG_TARGET_HAS_direct_jump */
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
+    (uintptr_t)(s->tb_jmp_target_addr + a0));
+    tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+    }
We unconditionally support the direct jump method after the change, so 
do we need to retain this block any more? Note the aarch64 port 
currently does the same (declaring unconditional support for direct 
jumps but keeping both code paths), if we want to remove this code 
path then you may want to remove the aarch64 one respectively too.
Yes, maybe we can remove these in our patch and submit another patch to 
modify aarch64 port?

  set_jmp_reset_offset(s, a0);
  break;
  @@ -1708,6 +1721,32 @@ static void tcg_target_init(TCGContext *s)
  tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
  }
  +void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+  uintptr_t jmp_rw, uintptr_t addr)
+{


Move the function to somewhere above, like above the "/* Entrypoints 
*/" section (and maybe introduce another section)? The various parts 
are more-or-less arranged in a particular order, so it's like going 
back to implementing concrete things after finishing everything and 
calling it a day.

~


Also you forgot to remove the now inappropriate comment about this 
helper in tcg-target.h.

Oh, I will remove the comment.



+    tcg_insn_unit i1, i2;
+
+    ptrdiff_t offset = addr - jmp_rx;
+
+    if (offset == sextreg(offset, 0, 28)) {
+    i1 = OPC_B | ((offset >> 18) & 0x3ff) | ((offset << 8) & 
0x3fffc00);
Use encode_sd10k16_insn instead.

Re: [PATCH 1/2] tcg/loongarch64: Implement INDEX_op_neg_i{32,64}

2022-10-12 Thread Qi Hu




On 2022/10/12 17:41, WANG Xuerui wrote:

Hi,

On 2022/10/12 17:13, Qi Hu wrote:

Signed-off-by: Qi Hu 
---
  tcg/loongarch64/tcg-target.c.inc | 9 +
  tcg/loongarch64/tcg-target.h | 4 ++--
  2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc 
b/tcg/loongarch64/tcg-target.c.inc

index a3debf6da7..f5a214a17f 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1125,6 +1125,13 @@ static void tcg_out_op(TCGContext *s, 
TCGOpcode opc,

  tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO);
  break;
  +    case INDEX_op_neg_i32:
+    tcg_out_opc_sub_w(s, a0, TCG_REG_ZERO, a1);
+    break;
+    case INDEX_op_neg_i64:
+    tcg_out_opc_sub_d(s, a0, TCG_REG_ZERO, a1);
+    break;
+
  case INDEX_op_nor_i32:
  case INDEX_op_nor_i64:
  if (c2) {
@@ -1503,6 +1510,8 @@ static TCGConstraintSetIndex 
tcg_target_op_def(TCGOpcode op)

  case INDEX_op_ext_i32_i64:
  case INDEX_op_not_i32:
  case INDEX_op_not_i64:
+    case INDEX_op_neg_i32:
+    case INDEX_op_neg_i64:
  case INDEX_op_extract_i32:
  case INDEX_op_extract_i64:
  case INDEX_op_bswap16_i32:
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index d58a6162f2..67380b2432 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -114,7 +114,7 @@ typedef enum {
  #define TCG_TARGET_HAS_bswap16_i32  1
  #define TCG_TARGET_HAS_bswap32_i32  1
  #define TCG_TARGET_HAS_not_i32  1
-#define TCG_TARGET_HAS_neg_i32  0
+#define TCG_TARGET_HAS_neg_i32  1
  #define TCG_TARGET_HAS_andc_i32 1
  #define TCG_TARGET_HAS_orc_i32  1
  #define TCG_TARGET_HAS_eqv_i32  0
@@ -150,7 +150,7 @@ typedef enum {
  #define TCG_TARGET_HAS_bswap32_i64  1
  #define TCG_TARGET_HAS_bswap64_i64  1
  #define TCG_TARGET_HAS_not_i64  1
-#define TCG_TARGET_HAS_neg_i64  0
+#define TCG_TARGET_HAS_neg_i64  1
  #define TCG_TARGET_HAS_andc_i64 1
  #define TCG_TARGET_HAS_orc_i64  1
  #define TCG_TARGET_HAS_eqv_i64  0
The whole change is not necessary, if target doesn't have neg then the 
target-independent logic already makes sure a sub with the same 
semantics is emitted. This is explained in the commit message of that 
commit introducing add/sub support.


That a good news. I think this patch can be absolutely ignored.

Thanks.

Qi

CXL emulation in QEMU contribution

2022-10-12 Thread Viacheslav A.Dubeyko

Hi Jonathan,

As we agreed, I am moving our discussion into public mailing list.

So, I would like to contribute to QEMU emulation of CXL memory support. And I 
would like to see a TODO list. I hope this list could be useful not only for 
me. As far as I can see, we can summarize:

1) Moving towards emulation of everything we need for Dynamic Capacity.
  a) Switch CCI - have a PoC but not yet doing tunneling to Type 3 EPs.
  b) Userspace tool to fake enough FM role that we can drive dynamics 
  c) Also need to do CXL 2.0 style HP of LDs on MLD devices (some demand
  for this to driver virtualization migration usecases)
  d) DCD implementation etc on the type 3 device.
2) Lots of smaller features from CXL 3.0 such as setting up BI.
3) Enough to test P2P UIO flows - probably need to invent an accelerator
  with appropriate support to test that - DMA engine or similar.
4) Bunch of small features:
  a) Multiple HDM decoders.
  b) Poisoning.  Right now we have prototype, but it's not wired up to actually 
report poison on reads.
  c) CXL non-function map DVSEC. Given QEMU lets you add any function to a 
given device by just setting  the bus to be the same as another, this is a bit 
fiddly because we need to updated it late in the QEMU bring up, or possibly 
easier to do it at read time (that may well be easier).
  d) Most useful of all, but most boring perhaps is review of what's already 
waiting for upstreaming.

Please, correct me if I miss something. I believe we need to have a TODO list 
to collaborate efficiently. Any ideas what else can be added into TODO list?

Thanks,
Slava. 

> Begin forwarded message:
> 
> From: Jonathan Cameron 
> Subject: Re: [External] CXL emulation in QEMU
> Date: October 11, 2022 at 1:52:28 AM PDT
> To: Viacheslav A.Dubeyko 
> Cc: Adam Manzanares , Cong Wang 
> 
> 
> On Tue, 11 Oct 2022 09:45:50 +0100
> Jonathan Cameron  wrote:
> 
>> On Mon, 10 Oct 2022 10:11:43 -0700
>> "Viacheslav A.Dubeyko"  wrote:
>> 
>>> Hi Jonathan,
>>> 
>>> It looks like that my email was confusing or, maybe, you simply missed my 
>>> email.  
>> Hi Slava,
>> 
>> 
>> Apologies, I thought from the comment you made about being fine to take it a
>> public list that you'd send a starting email to linux-cxl or the qemu-devel
>> and we'd take the discussion on there.  No problem with carrying on here
>> as nothing technical so we are fine...
>> 
>> 
>>> 
>>> My point is that I am ready to start from any feature at first. Then
>>> I will elaborate the vision what is more interesting for me. What a
>>> feature could I start to explore/implement?
>>> 
>> 
>> One thing I'm keen to get done, but haven't gotten to yet is doing a full
>> audit of spec vs what we have implemented and drawing up a todo list.
>> I can have a go at this perhaps later today.  Let's use the wiki
>> on my gitlab instance to build the list before sending it out for
>> wider review. 
>> 
>> https://gitlab.com/jic23/qemu/-/wikis/TODO-list
>> Send me an ID and I'll add you as a developer on the repo (which is
>> all you need to edit I think?)
>> 
>> I think there are a bunch of small features that we should wire up
>> that we haven't gotten to yet.
>> 
>> Examples of this include: 
>> * Multiple HDM decoders.
>> 
>> * Poisoning.  Right now we have prototype, but it's not wired up to
>> actually report poison on reads.
>> 
>> * CXL non-function map DVSEC
>>  Given QEMU lets you add any function to a given device by just setting
>>  the bus to be the same as another, this is a bit fiddly because we need
>>  to updated it late in the QEMU bring up, or possibly easier to do it
>>  at read time (that may well be easier).
>> 
>> * Compliance DOE + maybe DVSEC for test capability if anyone cares about 
>> that.
>> 
>> Most useful of all, but most boring perhaps is review of what's already 
>> waiting
>> for upstreaming.  I cross post everything to linux-...@vger.kernel.org as
>> well as qemu-devel.  + there is a bunch of stuff on my gitlab tree above.
>> cxl-2022-10-08 branch though that has some cleanup needed.
>> 
>> I'm focusing short term on upstreaming what we already have + some
>> enablement to get a discussion going about how to handle open source fabric
>> manager. Primarily switch CCI as introduced in CXL 3.0/
>> 
>> 
>>> Thanks,
>>> Slava.
>>> 
 On Oct 3, 2022, at 11:12 AM, Viacheslav A.Dubeyko 
  wrote:

 Hi Jonathan,

>> 
 I don’t see any troubles to move the discussion into public mailing
 list. I simply didn’t consider all these complications  that you
 shared.   
>> 
> If we want to do it on the phone, then I'm
> sure we can borrow a bit of the regular CXL Linux sync call that Dan 
> Williams
> at Intel organizes, or I we can organize something similar for QEMU side 
> of
> things.

 It could be the good idea.

> Definitely interested to hear what sorts of features you are interested
> in + working together on getting more of CXL emulation in place.

[PATCH v2] Properly sign extend BBIT branch offset during calculation

2022-10-12 Thread Christopher Wrogg

The Octeon specific BBIT instruction incorrectly computes
the branch offset. The 16 bit value is not sign extended.

Signed-off-by: Christopher Wrogg 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1251
---
 target/mips/tcg/octeon_translate.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/target/mips/tcg/octeon_translate.c
b/target/mips/tcg/octeon_translate.c
index 6a207d2e7e..90f7b105cb 100644
--- a/target/mips/tcg/octeon_translate.c
+++ b/target/mips/tcg/octeon_translate.c
@@ -38,7 +38,10 @@ static bool trans_BBIT(DisasContext *ctx, arg_BBIT *a)
 }

 ctx->hflags |= MIPS_HFLAG_BC;
-ctx->btarget = ctx->base.pc_next + 4 + a->offset * 4;
+a->offset *= 4;
+a->offset = (target_long)(int16_t)a->offset;
+ctx->btarget = ctx->base.pc_next + 4 + a->offset;
+
 ctx->hflags |= MIPS_HFLAG_BDS32;

 tcg_temp_free(t0);
-- 
2.30.2

Re: [PULL 00/50] Block layer patches

2022-10-12 Thread Stefan Hajnoczi

Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.2 for any 
user-visible changes.


signature.asc
Description: PGP signature

Re: [PULL 00/28] target-arm queue

2022-10-12 Thread Stefan Hajnoczi

Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.2 for any 
user-visible changes.


signature.asc
Description: PGP signature

Re: [PULL 00/55] pc,virtio: features, tests, fixes, cleanups

2022-10-12 Thread Stefan Hajnoczi

Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.2 for any 
user-visible changes.


signature.asc
Description: PGP signature

Re: [PULL 00/55] pc,virtio: features, tests, fixes, cleanups

2022-10-12 Thread Stefan Hajnoczi

On Wed, 12 Oct 2022 at 17:00, Michael S. Tsirkin  wrote:
>
> On Wed, Oct 12, 2022 at 04:04:31PM -0400, Stefan Hajnoczi wrote:
> > On Mon, 10 Oct 2022 at 13:46, Michael S. Tsirkin  wrote:
> > >
> > > The following changes since commit 
> > > f1d33f55c47dfdaf8daacd618588ad3ae4c452d1:
> > >
> > >   Merge tag 'pull-testing-gdbstub-plugins-gitdm-061022-3' of 
> > > https://github.com/stsquad/qemu into staging (2022-10-06 07:11:56 -0400)
> > >
> > > are available in the Git repository at:
> > >
> > >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> >
> > Hi Michael,
> > Please update your .git/config with the https URL for future pull requests:
> >
> >   https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git
> >
> > The pull request is signed with your GPG, so modifications should be
> > detected when verifying the signature. It still seems like a good idea
> > to use https:// when possible instead of unencrypted git://.
> >
> > Stefan
>
> I don't think this is from .git/config, this is just a parameter
> to request-pull. OK, I will switch to that.

You're right. For some reason I thought the URL was pulled from the
remote's configuration in .git/config.

Stefan

Re: [PULL 00/55] pc,virtio: features, tests, fixes, cleanups

2022-10-12 Thread Michael S. Tsirkin

On Wed, Oct 12, 2022 at 04:04:31PM -0400, Stefan Hajnoczi wrote:
> On Mon, 10 Oct 2022 at 13:46, Michael S. Tsirkin  wrote:
> >
> > The following changes since commit f1d33f55c47dfdaf8daacd618588ad3ae4c452d1:
> >
> >   Merge tag 'pull-testing-gdbstub-plugins-gitdm-061022-3' of 
> > https://github.com/stsquad/qemu into staging (2022-10-06 07:11:56 -0400)
> >
> > are available in the Git repository at:
> >
> >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> 
> Hi Michael,
> Please update your .git/config with the https URL for future pull requests:
> 
>   https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git
> 
> The pull request is signed with your GPG, so modifications should be
> detected when verifying the signature. It still seems like a good idea
> to use https:// when possible instead of unencrypted git://.
> 
> Stefan

I don't think this is from .git/config, this is just a parameter
to request-pull. OK, I will switch to that.

-- 
MST

Re: [PULL 00/55] pc,virtio: features, tests, fixes, cleanups

2022-10-12 Thread Stefan Hajnoczi

On Mon, 10 Oct 2022 at 13:46, Michael S. Tsirkin  wrote:
>
> The following changes since commit f1d33f55c47dfdaf8daacd618588ad3ae4c452d1:
>
>   Merge tag 'pull-testing-gdbstub-plugins-gitdm-061022-3' of 
> https://github.com/stsquad/qemu into staging (2022-10-06 07:11:56 -0400)
>
> are available in the Git repository at:
>
>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

Hi Michael,
Please update your .git/config with the https URL for future pull requests:

  https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git

The pull request is signed with your GPG, so modifications should be
detected when verifying the signature. It still seems like a good idea
to use https:// when possible instead of unencrypted git://.

Stefan

MultiFD and default channel out of order mapping on receive side.

2022-10-12 Thread manish.mishra


Hi Everyone,
Hope everyone is doing great. I have seen some live migration issues with 
qemu-4.2 when using multiFD. Signature of issue is something like this.
2022-10-01T09:57:53.972864Z qemu-kvm: failed to receive packet via multifd 
channel 0: multifd: received packet magic 5145564d expected 11223344

Basically default live migration channel packet is received on multiFD channel. 
I see a older patch explaining potential reason for this behavior.
https://lists.gnu.org/archive/html/qemu-devel/2019-10/msg05920.html
> [PATCH 3/3] migration/multifd: fix potential wrong acception order of IO.

But i see this patch was not merged. By looking at qemu master code, i could 
not find any other patch too which can handle this issue. So as per my 
understanding this is still a potential issue even in qemu master. I mainly 
wanted to check why this patch was dropped? Sorry if mis-understood something. 
It will be great if someone can provide some pointers on this.

Thanks
Manish Mishra

Re: [PATCH 2/3] target/s390x: Fix emulation of the VISTR instruction

2022-10-12 Thread David Hildenbrand


On 12.10.22 20:27, Thomas Huth wrote:

The element size is encoded in the M3 field, not in the M4
field.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1248
Fixes: be6324c6b734 ("s390x/tcg: Implement VECTOR ISOLATE STRING")
Reviewed-by: Richard Henderson 
Signed-off-by: Thomas Huth 
---


Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

Re: [PATCH 1/4] qom: Improve error messages when property has no getter or setter

2022-10-12 Thread David Hildenbrand


On 12.10.22 17:37, Markus Armbruster wrote:

When you try to set a property that has no setter, the error message
blames "insufficient permission":

 $ qemu-system-x86_64 -S -display none -nodefaults -monitor stdio
 QEMU 7.1.50 monitor - type 'help' for more information
 (qemu) qom-set /machine type q35
 Error: Insufficient permission to perform this operation

This implies it could work with "sufficient permission".  It can't.
Change the error message to:

 Error: Property 'pc-i440fx-7.2-machine.type' is not writable

Do the same for getting a property that has no getter.

Signed-off-by: Markus Armbruster 
---
  qom/object.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/qom/object.c b/qom/object.c
index d34608558e..e5cef30f6d 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -1383,7 +1383,8 @@ bool object_property_get(Object *obj, const char *name, 
Visitor *v,
  }
  
  if (!prop->get) {

-error_setg(errp, QERR_PERMISSION_DENIED);
+error_setg(errp, "Property '%s.%s' is not readable",
+   object_get_typename(obj), name);
  return false;
  }
  prop->get(obj, v, name, prop->opaque, );
@@ -1402,7 +1403,8 @@ bool object_property_set(Object *obj, const char *name, 
Visitor *v,
  }
  
  if (!prop->set) {

-error_setg(errp, QERR_PERMISSION_DENIED);
+error_setg(errp, "Property '%s.%s' is not writable",
+   object_get_typename(obj), name);
  return false;
  }
  prop->set(obj, v, name, prop->opaque, errp);


Much better!

Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

Re: [PATCH 3/3] tests/tcg/s390x: Add a test for the vistr instruction

2022-10-12 Thread David Hildenbrand


On 12.10.22 20:27, Thomas Huth wrote:

This test can be used to verify that the change in the previous
commit is indeed fixing the problem with the M3 vs. M4 field
mixup.

Signed-off-by: Thomas Huth 
---


Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

Re: [PATCH 1/3] tests/tcg/s390x: Test compiler flags only once, not every time

2022-10-12 Thread David Hildenbrand


On 12.10.22 20:27, Thomas Huth wrote:

This is common practice, see the Makefile.target in the aarch64
folder for example.

Suggested-by: Alex Bennée 
Signed-off-by: Thomas Huth 
---
  tests/tcg/s390x/Makefile.target | 27 +--
  1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index c830313e67..29c8af8207 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -1,6 +1,13 @@
  S390X_SRC=$(SRC_PATH)/tests/tcg/s390x
  VPATH+=$(S390X_SRC)
  CFLAGS+=-march=zEC12 -m64
+
+config-cc.mak: Makefile
+   $(quiet-@)( \
+   $(call cc-option,-march=z14, CROSS_CC_HAS_Z14); \
+   $(call cc-option,-march=z15, CROSS_CC_HAS_Z15)) 3> config-cc.mak
+-include config-cc.mak
+
  TESTS+=hello-s390x
  TESTS+=csst
  TESTS+=ipm
@@ -18,20 +25,20 @@ TESTS+=signals-s390x
  TESTS+=branch-relative-long
  TESTS+=noexec
  
+ifneq ($(CROSS_CC_HAS_Z14),)

  Z14_TESTS=vfminmax
  vfminmax: LDFLAGS+=-lm
  $(Z14_TESTS): CFLAGS+=-march=z14 -O2
+TESTS+=$(Z14_TESTS)
+endif
  
-TESTS+=$(if $(shell $(CC) -march=z14 -S -o /dev/null -xc /dev/null \

->/dev/null 2>&1 && echo OK),$(Z14_TESTS))
-
-VECTOR_TESTS=vxeh2_vs
-VECTOR_TESTS+=vxeh2_vcvt
-VECTOR_TESTS+=vxeh2_vlstr
-$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
-
-TESTS+=$(if $(shell $(CC) -march=z15 -S -o /dev/null -xc /dev/null \
->/dev/null 2>&1 && echo OK),$(VECTOR_TESTS))
+ifneq ($(CROSS_CC_HAS_Z15),)
+Z15_TESTS=vxeh2_vs
+Z15_TESTS+=vxeh2_vcvt
+Z15_TESTS+=vxeh2_vlstr
+$(Z15_TESTS): CFLAGS+=-march=z15 -O2
+TESTS+=$(Z15_TESTS)
+endif
  
  ifneq ($(HAVE_GDB_BIN),)

  GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py


Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

Re: [PATCH 3/4] qtest: Improve error messages when property can not be set right now

2022-10-12 Thread David Hildenbrand


On 12.10.22 20:05, Thomas Huth wrote:

On 12/10/2022 17.38, Markus Armbruster wrote:

When you try to set qtest property "log" while the qtest object is
active, the error message blames "insufficient permission":

  $ qemu-system-x86_64 -S -display none -nodefaults -monitor stdio -chardev 
socket,id=chrqt0,path=qtest.socket,server=on,wait=off -object 
qtest,id=qt0,chardev=chrqt0,log=/dev/null
  QEMU 7.1.50 monitor - type 'help' for more information
  (qemu) qom-set /objects/qt0 log qtest.log
  Error: Insufficient permission to perform this operation

This implies it could work with "sufficient permission".  It can't.
Change the error message to:

  Error: Property 'log' can not be set now


Can it be set later? ... if not, that error message is almost as confusing
as the original one. Maybe it's better to tell the users *when* they can set
the property?


I assume it's mostly about "This property cannot be set." and "This 
property can no longer be set." ?


--
Thanks,

David / dhildenb

Re: [PATCH] bsd-user: Catch up with sys/param.h requirement for machine/pmap.h

2022-10-12 Thread Warner Losh

Note: This is my first patch that someone else wrote that I'm trying to
accept
and land as the bsd-user maintainer unrelated to the other upstreaming work.
Please be let me know if I'm doing something wrong. I'll queue it to this
month's
bsd-user pull request (which likely will only include this patch).

Thanks!

On Wed, Oct 12, 2022 at 12:54 PM Warner Losh  wrote:

> From: Muhammad Moinur Rahman 
>
> Some versions of FreeBSD now require sys/param.h for machine/pmap.h on
> x86. Include them here to meet that requirement. It does no harm on
> older versions, so there's no need to #ifdef it.
>
> Signed-off-by:  Muhammad Moinur Rahman 
> Reviewed-by:John Baldwin 
> Signed-off-by:  Warner Losh 
> ---
>  bsd-user/host/i386/host-signal.h   | 1 +
>  bsd-user/host/x86_64/host-signal.h | 1 +
>  2 files changed, 2 insertions(+)
>
> diff --git a/bsd-user/host/i386/host-signal.h
> b/bsd-user/host/i386/host-signal.h
> index 169e61b154c..ffdfaba534a 100644
> --- a/bsd-user/host/i386/host-signal.h
> +++ b/bsd-user/host/i386/host-signal.h
> @@ -9,6 +9,7 @@
>  #ifndef I386_HOST_SIGNAL_H
>  #define I386_HOST_SIGNAL_H
>
> +#include 
>  #include 
>  #include 
>  #include 
> diff --git a/bsd-user/host/x86_64/host-signal.h
> b/bsd-user/host/x86_64/host-signal.h
> index 47ca19f8814..32ac4e41803 100644
> --- a/bsd-user/host/x86_64/host-signal.h
> +++ b/bsd-user/host/x86_64/host-signal.h
> @@ -9,6 +9,7 @@
>  #ifndef X86_64_HOST_SIGNAL_H
>  #define X86_64_HOST_SIGNAL_H
>
> +#include 
>  #include 
>  #include 
>  #include 
> --
> 2.33.1
>
>

[PATCH] bsd-user: Catch up with sys/param.h requirement for machine/pmap.h

2022-10-12 Thread Warner Losh

From: Muhammad Moinur Rahman 

Some versions of FreeBSD now require sys/param.h for machine/pmap.h on
x86. Include them here to meet that requirement. It does no harm on
older versions, so there's no need to #ifdef it.

Signed-off-by:  Muhammad Moinur Rahman 
Reviewed-by:John Baldwin 
Signed-off-by:  Warner Losh 
---
 bsd-user/host/i386/host-signal.h   | 1 +
 bsd-user/host/x86_64/host-signal.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/bsd-user/host/i386/host-signal.h b/bsd-user/host/i386/host-signal.h
index 169e61b154c..ffdfaba534a 100644
--- a/bsd-user/host/i386/host-signal.h
+++ b/bsd-user/host/i386/host-signal.h
@@ -9,6 +9,7 @@
 #ifndef I386_HOST_SIGNAL_H
 #define I386_HOST_SIGNAL_H
 
+#include 
 #include 
 #include 
 #include 
diff --git a/bsd-user/host/x86_64/host-signal.h 
b/bsd-user/host/x86_64/host-signal.h
index 47ca19f8814..32ac4e41803 100644
--- a/bsd-user/host/x86_64/host-signal.h
+++ b/bsd-user/host/x86_64/host-signal.h
@@ -9,6 +9,7 @@
 #ifndef X86_64_HOST_SIGNAL_H
 #define X86_64_HOST_SIGNAL_H
 
+#include 
 #include 
 #include 
 #include 
-- 
2.33.1

[PATCH 1/3] tests/tcg/s390x: Test compiler flags only once, not every time

2022-10-12 Thread Thomas Huth

This is common practice, see the Makefile.target in the aarch64
folder for example.

Suggested-by: Alex Bennée 
Signed-off-by: Thomas Huth 
---
 tests/tcg/s390x/Makefile.target | 27 +--
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index c830313e67..29c8af8207 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -1,6 +1,13 @@
 S390X_SRC=$(SRC_PATH)/tests/tcg/s390x
 VPATH+=$(S390X_SRC)
 CFLAGS+=-march=zEC12 -m64
+
+config-cc.mak: Makefile
+   $(quiet-@)( \
+   $(call cc-option,-march=z14, CROSS_CC_HAS_Z14); \
+   $(call cc-option,-march=z15, CROSS_CC_HAS_Z15)) 3> config-cc.mak
+-include config-cc.mak
+
 TESTS+=hello-s390x
 TESTS+=csst
 TESTS+=ipm
@@ -18,20 +25,20 @@ TESTS+=signals-s390x
 TESTS+=branch-relative-long
 TESTS+=noexec
 
+ifneq ($(CROSS_CC_HAS_Z14),)
 Z14_TESTS=vfminmax
 vfminmax: LDFLAGS+=-lm
 $(Z14_TESTS): CFLAGS+=-march=z14 -O2
+TESTS+=$(Z14_TESTS)
+endif
 
-TESTS+=$(if $(shell $(CC) -march=z14 -S -o /dev/null -xc /dev/null \
->/dev/null 2>&1 && echo OK),$(Z14_TESTS))
-
-VECTOR_TESTS=vxeh2_vs
-VECTOR_TESTS+=vxeh2_vcvt
-VECTOR_TESTS+=vxeh2_vlstr
-$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
-
-TESTS+=$(if $(shell $(CC) -march=z15 -S -o /dev/null -xc /dev/null \
->/dev/null 2>&1 && echo OK),$(VECTOR_TESTS))
+ifneq ($(CROSS_CC_HAS_Z15),)
+Z15_TESTS=vxeh2_vs
+Z15_TESTS+=vxeh2_vcvt
+Z15_TESTS+=vxeh2_vlstr
+$(Z15_TESTS): CFLAGS+=-march=z15 -O2
+TESTS+=$(Z15_TESTS)
+endif
 
 ifneq ($(HAVE_GDB_BIN),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
-- 
2.31.1

[PATCH 3/3] tests/tcg/s390x: Add a test for the vistr instruction

2022-10-12 Thread Thomas Huth

This test can be used to verify that the change in the previous
commit is indeed fixing the problem with the M3 vs. M4 field
mixup.

Signed-off-by: Thomas Huth 
---
 tests/tcg/s390x/vistr.c | 45 +
 tests/tcg/s390x/Makefile.target |  4 +++
 2 files changed, 49 insertions(+)
 create mode 100644 tests/tcg/s390x/vistr.c

diff --git a/tests/tcg/s390x/vistr.c b/tests/tcg/s390x/vistr.c
new file mode 100644
index 00..8e3e987d71
--- /dev/null
+++ b/tests/tcg/s390x/vistr.c
@@ -0,0 +1,45 @@
+/*
+ * Test the VECTOR ISOLATE STRING (vistr) instruction
+ */
+#include 
+#include 
+#include "vx.h"
+
+static inline void vistr(S390Vector *v1, S390Vector *v2,
+ const uint8_t m3, const uint8_t m5)
+{
+asm volatile("vistr %[v1], %[v2], %[m3], %[m5]\n"
+ : [v1] "=v" (v1->v)
+ : [v2]  "v" (v2->v)
+ , [m3]  "i" (m3)
+ , [m5]  "i" (m5)
+ : "cc");
+}
+
+int main(int argc, char *argv[])
+{
+S390Vector vd = {};
+S390Vector vs16 = {
+.h[0] = 0x1234, .h[1] = 0x0056, .h[2] = 0x7800, .h[3] = 0x,
+.h[4] = 0x0078, .h[5] = 0x, .h[6] = 0x6543, .h[7] = 0x2100
+};
+S390Vector vs32 = {
+.w[0] = 0x1234, .w[1] = 0x78654300,
+.w[2] = 0x0, .w[3] = 0x12,
+};
+
+vistr(, , 1, 0);
+if (vd.h[0] != 0x1234 || vd.h[1] != 0x0056 || vd.h[2] != 0x7800 ||
+vd.h[3] || vd.h[4] || vd.h[5] || vd.h[6] || vd.h[7]) {
+puts("ERROR: vitrh failed!");
+return 1;
+}
+
+vistr(, , 2, 0);
+if (vd.w[0] != 0x1234 || vd.w[1] != 0x78654300 || vd.w[2] || vd.w[3]) {
+puts("ERROR: vitrf failed!");
+return 1;
+}
+
+return 0;
+}
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 29c8af8207..07fcc6d0ce 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -25,6 +25,10 @@ TESTS+=signals-s390x
 TESTS+=branch-relative-long
 TESTS+=noexec
 
+Z13_TESTS=vistr
+$(Z13_TESTS): CFLAGS+=-march=z13 -O2
+TESTS+=$(Z13_TESTS)
+
 ifneq ($(CROSS_CC_HAS_Z14),)
 Z14_TESTS=vfminmax
 vfminmax: LDFLAGS+=-lm
-- 
2.31.1

[PATCH 2/3] target/s390x: Fix emulation of the VISTR instruction

2022-10-12 Thread Thomas Huth

The element size is encoded in the M3 field, not in the M4
field.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1248
Fixes: be6324c6b734 ("s390x/tcg: Implement VECTOR ISOLATE STRING")
Reviewed-by: Richard Henderson 
Signed-off-by: Thomas Huth 
---
 target/s390x/tcg/translate_vx.c.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/s390x/tcg/translate_vx.c.inc 
b/target/s390x/tcg/translate_vx.c.inc
index 3526ba3e3b..b69c1a111c 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2723,7 +2723,7 @@ static DisasJumpType op_vfene(DisasContext *s, DisasOps 
*o)
 
 static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
 {
-const uint8_t es = get_field(s, m4);
+const uint8_t es = get_field(s, m3);
 const uint8_t m5 = get_field(s, m5);
 static gen_helper_gvec_2 * const g[3] = {
 gen_helper_gvec_vistr8,
-- 
2.31.1

[PATCH 4/5] hw/mem/cxl_type3: Change the CDAT allocation/free strategy

2022-10-12 Thread Gregory Price

The existing code allocates a subtable for SLBIS entries, uses a
local variable to avoid a g_autofree footgun, and the cleanup code
causes heap corruption.

Rather than allocate a table, explicitly allocate each individual entry
and make the sub-table size static.

Signed-off-by: Gregory Price 
---
 hw/mem/cxl_type3.c | 49 --
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 0e0ea70387..220b9f09a9 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -23,13 +23,14 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 void *priv)
 {
 g_autofree CDATDsmas *dsmas_nonvolatile = NULL;
-g_autofree CDATDslbis *dslbis_nonvolatile = NULL;
+g_autofree CDATDslbis *dslbis_nonvolatile1 = NULL;
+g_autofree CDATDslbis *dslbis_nonvolatile2 = NULL;
+g_autofree CDATDslbis *dslbis_nonvolatile3 = NULL;
+g_autofree CDATDslbis *dslbis_nonvolatile4 = NULL;
 g_autofree CDATDsemts *dsemts_nonvolatile = NULL;
 CXLType3Dev *ct3d = priv;
-int i = 0;
 int next_dsmad_handle = 0;
 int nonvolatile_dsmad = -1;
-int dslbis_nonvolatile_num = 4;
 MemoryRegion *mr;
 
 if (!ct3d->hostmem) {
@@ -48,10 +49,15 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 
 /* Non volatile aspects */
 dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
-dslbis_nonvolatile =
-g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
+dslbis_nonvolatile1 = g_malloc(sizeof(*dslbis_nonvolatile1));
+dslbis_nonvolatile2 = g_malloc(sizeof(*dslbis_nonvolatile2));
+dslbis_nonvolatile3 = g_malloc(sizeof(*dslbis_nonvolatile3));
+dslbis_nonvolatile4 = g_malloc(sizeof(*dslbis_nonvolatile4));
 dsemts_nonvolatile = g_malloc(sizeof(*dsemts_nonvolatile));
-if (!dsmas_nonvolatile || !dslbis_nonvolatile || !dsemts_nonvolatile) {
+
+if (!dsmas_nonvolatile || !dsemts_nonvolatile ||
+!dslbis_nonvolatile1 || !dslbis_nonvolatile2 ||
+!dslbis_nonvolatile3 || !dslbis_nonvolatile4) {
 g_free(*cdat_table);
 *cdat_table = NULL;
 return -ENOMEM;
@@ -70,10 +76,10 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 };
 
 /* For now, no memory side cache, plausiblish numbers */
-dslbis_nonvolatile[0] = (CDATDslbis) {
+*dslbis_nonvolatile1 = (CDATDslbis) {
 .header = {
 .type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile),
+.length = sizeof(*dslbis_nonvolatile1),
 },
 .handle = nonvolatile_dsmad,
 .flags = HMAT_LB_MEM_MEMORY,
@@ -82,10 +88,10 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 .entry[0] = 15, /* 150ns */
 };
 
-dslbis_nonvolatile[1] = (CDATDslbis) {
+*dslbis_nonvolatile2 = (CDATDslbis) {
 .header = {
 .type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile),
+.length = sizeof(*dslbis_nonvolatile2),
 },
 .handle = nonvolatile_dsmad,
 .flags = HMAT_LB_MEM_MEMORY,
@@ -94,10 +100,10 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table,
 .entry[0] = 25, /* 250ns */
 };
 
-dslbis_nonvolatile[2] = (CDATDslbis) {
+*dslbis_nonvolatile3 = (CDATDslbis) {
 .header = {
 .type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile),
+.length = sizeof(*dslbis_nonvolatile3),
 },
 .handle = nonvolatile_dsmad,
 .flags = HMAT_LB_MEM_MEMORY,
@@ -106,10 +112,10 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table,
 .entry[0] = 16,
 };
 
-dslbis_nonvolatile[3] = (CDATDslbis) {
+*dslbis_nonvolatile4 = (CDATDslbis) {
 .header = {
 .type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile),
+.length = sizeof(*dslbis_nonvolatile4),
 },
 .handle = nonvolatile_dsmad,
 .flags = HMAT_LB_MEM_MEMORY,
@@ -131,15 +137,12 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table,
 };
 
 /* Header always at start of structure */
-(*cdat_table)[i++] = g_steal_pointer(_nonvolatile);
-
-CDATDslbis *dslbis = g_steal_pointer(_nonvolatile);
-int j;
-for (j = 0; j < dslbis_nonvolatile_num; j++) {
-(*cdat_table)[i++] = (CDATSubHeader *)[j];
-}
-
-(*cdat_table)[i++] = g_steal_pointer(_nonvolatile);
+(*cdat_table)[0] = g_steal_pointer(_nonvolatile);
+(*cdat_table)[1] = (CDATSubHeader *)g_steal_pointer(_nonvolatile1);
+(*cdat_table)[2] = (CDATSubHeader *)g_steal_pointer(_nonvolatile2);
+(*cdat_table)[3] = (CDATSubHeader *)g_steal_pointer(_nonvolatile3);
+(*cdat_table)[4] = (CDATSubHeader *)g_steal_pointer(_nonvolatile4);
+(*cdat_table)[5] = g_steal_pointer(_nonvolatile);
 
 return CT3_CDAT_SUBTABLE_SIZE;
 }
--

[PATCH 0/3] s390x: Fix for vistr instruction & addition to TCG tests

2022-10-12 Thread Thomas Huth

1) Improve tests/tcg/s390x/Makefile.target to look for -march flags only once
2) Fix a problem with the vistr instruction
3) Add a test for the vistr instruction

Thomas Huth (3):
  tests/tcg/s390x: Test compiler flags only once, not every time
  target/s390x: Fix emulation of the VISTR instruction
  tests/tcg/s390x: Add a test for the vistr instruction

 tests/tcg/s390x/vistr.c | 45 +
 target/s390x/tcg/translate_vx.c.inc |  2 +-
 tests/tcg/s390x/Makefile.target | 31 +---
 3 files changed, 67 insertions(+), 11 deletions(-)
 create mode 100644 tests/tcg/s390x/vistr.c

-- 
2.31.1

[PATCH 3/5] hw/mem/cxl_type3: CDAT pre-allocate and check resources prior to work

2022-10-12 Thread Gregory Price

Makes the size of the allocated cdat table static (6 entries),
flattens the code, and reduces the number of exit conditions

Signed-off-by: Gregory Price 
---
 hw/mem/cxl_type3.c | 52 --
 1 file changed, 22 insertions(+), 30 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 43b2b9e041..0e0ea70387 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -17,6 +17,7 @@
 #include "hw/pci/msix.h"
 
 #define DWORD_BYTE 4
+#define CT3_CDAT_SUBTABLE_SIZE 6
 
 static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 void *priv)
@@ -25,7 +26,6 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 g_autofree CDATDslbis *dslbis_nonvolatile = NULL;
 g_autofree CDATDsemts *dsemts_nonvolatile = NULL;
 CXLType3Dev *ct3d = priv;
-int len = 0;
 int i = 0;
 int next_dsmad_handle = 0;
 int nonvolatile_dsmad = -1;
@@ -33,7 +33,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 MemoryRegion *mr;
 
 if (!ct3d->hostmem) {
-return len;
+return 0;
 }
 
 mr = host_memory_backend_get_memory(ct3d->hostmem);
@@ -41,11 +41,22 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 return -EINVAL;
 }
 
+*cdat_table = g_malloc0(CT3_CDAT_SUBTABLE_SIZE * sizeof(*cdat_table));
+if (!*cdat_table) {
+return -ENOMEM;
+}
+
 /* Non volatile aspects */
 dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
-if (!dsmas_nonvolatile) {
+dslbis_nonvolatile =
+g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
+dsemts_nonvolatile = g_malloc(sizeof(*dsemts_nonvolatile));
+if (!dsmas_nonvolatile || !dslbis_nonvolatile || !dsemts_nonvolatile) {
+g_free(*cdat_table);
+*cdat_table = NULL;
 return -ENOMEM;
 }
+
 nonvolatile_dsmad = next_dsmad_handle++;
 *dsmas_nonvolatile = (CDATDsmas) {
 .header = {
@@ -57,15 +68,8 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 .DPA_base = 0,
 .DPA_length = int128_get64(mr->size),
 };
-len++;
 
 /* For now, no memory side cache, plausiblish numbers */
-dslbis_nonvolatile =
-g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
-if (!dslbis_nonvolatile) {
-return -ENOMEM;
-}
-
 dslbis_nonvolatile[0] = (CDATDslbis) {
 .header = {
 .type = CDAT_TYPE_DSLBIS,
@@ -77,7 +81,6 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 .entry_base_unit = 1, /* 10ns base */
 .entry[0] = 15, /* 150ns */
 };
-len++;
 
 dslbis_nonvolatile[1] = (CDATDslbis) {
 .header = {
@@ -90,7 +93,6 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 .entry_base_unit = 1,
 .entry[0] = 25, /* 250ns */
 };
-len++;
 
 dslbis_nonvolatile[2] = (CDATDslbis) {
 .header = {
@@ -103,7 +105,6 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 .entry_base_unit = 1000, /* GB/s */
 .entry[0] = 16,
 };
-len++;
 
 dslbis_nonvolatile[3] = (CDATDslbis) {
 .header = {
@@ -116,9 +117,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 .entry_base_unit = 1000, /* GB/s */
 .entry[0] = 16,
 };
-len++;
 
-dsemts_nonvolatile = g_malloc(sizeof(*dsemts_nonvolatile));
 *dsemts_nonvolatile = (CDATDsemts) {
 .header = {
 .type = CDAT_TYPE_DSEMTS,
@@ -130,26 +129,19 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table,
 .DPA_offset = 0,
 .DPA_length = int128_get64(mr->size),
 };
-len++;
 
-*cdat_table = g_malloc0(len * sizeof(*cdat_table));
 /* Header always at start of structure */
-if (dsmas_nonvolatile) {
-(*cdat_table)[i++] = g_steal_pointer(_nonvolatile);
-}
-if (dslbis_nonvolatile) {
-CDATDslbis *dslbis = g_steal_pointer(_nonvolatile);
-int j;
+(*cdat_table)[i++] = g_steal_pointer(_nonvolatile);
 
-for (j = 0; j < dslbis_nonvolatile_num; j++) {
-(*cdat_table)[i++] = (CDATSubHeader *)[j];
-}
-}
-if (dsemts_nonvolatile) {
-(*cdat_table)[i++] = g_steal_pointer(_nonvolatile);
+CDATDslbis *dslbis = g_steal_pointer(_nonvolatile);
+int j;
+for (j = 0; j < dslbis_nonvolatile_num; j++) {
+(*cdat_table)[i++] = (CDATSubHeader *)[j];
 }
 
-return len;
+(*cdat_table)[i++] = g_steal_pointer(_nonvolatile);
+
+return CT3_CDAT_SUBTABLE_SIZE;
 }
 
 static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void 
*priv)
-- 
2.37.3

[PATCH 2/5] hw/mem/cxl_type3: Pull validation checks ahead of functional code

2022-10-12 Thread Gregory Price

For style - pulling these validations ahead flattens the code.

Signed-off-by: Gregory Price 
---
 hw/mem/cxl_type3.c | 193 ++---
 1 file changed, 96 insertions(+), 97 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 94bc439d89..43b2b9e041 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -32,107 +32,106 @@ static int ct3_build_cdat_table(CDATSubHeader 
***cdat_table,
 int dslbis_nonvolatile_num = 4;
 MemoryRegion *mr;
 
+if (!ct3d->hostmem) {
+return len;
+}
+
+mr = host_memory_backend_get_memory(ct3d->hostmem);
+if (!mr) {
+return -EINVAL;
+}
+
 /* Non volatile aspects */
-if (ct3d->hostmem) {
-dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
-if (!dsmas_nonvolatile) {
-return -ENOMEM;
-}
-nonvolatile_dsmad = next_dsmad_handle++;
-mr = host_memory_backend_get_memory(ct3d->hostmem);
-if (!mr) {
-return -EINVAL;
-}
-*dsmas_nonvolatile = (CDATDsmas) {
-.header = {
-.type = CDAT_TYPE_DSMAS,
-.length = sizeof(*dsmas_nonvolatile),
-},
-.DSMADhandle = nonvolatile_dsmad,
-.flags = CDAT_DSMAS_FLAG_NV,
-.DPA_base = 0,
-.DPA_length = int128_get64(mr->size),
-};
-len++;
-
-/* For now, no memory side cache, plausiblish numbers */
-dslbis_nonvolatile =
-g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
-if (!dslbis_nonvolatile) {
-return -ENOMEM;
-}
+dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
+if (!dsmas_nonvolatile) {
+return -ENOMEM;
+}
+nonvolatile_dsmad = next_dsmad_handle++;
+*dsmas_nonvolatile = (CDATDsmas) {
+.header = {
+.type = CDAT_TYPE_DSMAS,
+.length = sizeof(*dsmas_nonvolatile),
+},
+.DSMADhandle = nonvolatile_dsmad,
+.flags = CDAT_DSMAS_FLAG_NV,
+.DPA_base = 0,
+.DPA_length = int128_get64(mr->size),
+};
+len++;
 
-dslbis_nonvolatile[0] = (CDATDslbis) {
-.header = {
-.type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile),
-},
-.handle = nonvolatile_dsmad,
-.flags = HMAT_LB_MEM_MEMORY,
-.data_type = HMAT_LB_DATA_READ_LATENCY,
-.entry_base_unit = 1, /* 10ns base */
-.entry[0] = 15, /* 150ns */
-};
-len++;
-
-dslbis_nonvolatile[1] = (CDATDslbis) {
-.header = {
-.type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile),
-},
-.handle = nonvolatile_dsmad,
-.flags = HMAT_LB_MEM_MEMORY,
-.data_type = HMAT_LB_DATA_WRITE_LATENCY,
-.entry_base_unit = 1,
-.entry[0] = 25, /* 250ns */
-};
-len++;
-
-dslbis_nonvolatile[2] = (CDATDslbis) {
-.header = {
-.type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile),
-},
-.handle = nonvolatile_dsmad,
-.flags = HMAT_LB_MEM_MEMORY,
-.data_type = HMAT_LB_DATA_READ_BANDWIDTH,
-.entry_base_unit = 1000, /* GB/s */
-.entry[0] = 16,
-};
-len++;
-
-dslbis_nonvolatile[3] = (CDATDslbis) {
-.header = {
-.type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile),
-},
-.handle = nonvolatile_dsmad,
-.flags = HMAT_LB_MEM_MEMORY,
-.data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
-.entry_base_unit = 1000, /* GB/s */
-.entry[0] = 16,
-};
-len++;
-
-mr = host_memory_backend_get_memory(ct3d->hostmem);
-if (!mr) {
-return -EINVAL;
-}
-dsemts_nonvolatile = g_malloc(sizeof(*dsemts_nonvolatile));
-*dsemts_nonvolatile = (CDATDsemts) {
-.header = {
-.type = CDAT_TYPE_DSEMTS,
-.length = sizeof(*dsemts_nonvolatile),
-},
-.DSMAS_handle = nonvolatile_dsmad,
-/* Reserved - the non volatile from DSMAS matters */
-.EFI_memory_type_attr = 2,
-.DPA_offset = 0,
-.DPA_length = int128_get64(mr->size),
-};
-len++;
+/* For now, no memory side cache, plausiblish numbers */
+dslbis_nonvolatile =
+g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
+if (!dslbis_nonvolatile) {
+return -ENOMEM;
 }
 
+dslbis_nonvolatile[0] = (CDATDslbis) {
+.header = {
+.type = CDAT_TYPE_DSLBIS,
+.length = sizeof(*dslbis_nonvolatile),
+},
+.handle =

Re: [PATCH v7 4/5] hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange

2022-10-12 Thread Gregory Price

Included in this response is a recommended patch set on top of this
patch that resolves a number of issues, including style and a heap
corruption bug.

The purpose of this patch set is to refactor the CDAT initialization
code to support future patch sets that will introduce multi-region
support in CXL Type3 devices.

1) Checkpatch errors in the immediately prior patch
2) Flatting of code in cdat initialization
3) Changes in allocation and error checking for cleanliness
4) Change in the allocation/free strategy of CDAT sub-tables to simplify
   multi-region allocation in the future.  Also resolves a heap
   corruption bug
5) Refactor of CDAT initialization code into a function that initializes
   sub-tables per memory-region.

Gregory Price (5):
  hw/mem/cxl_type3: fix checkpatch errors
  hw/mem/cxl_type3: Pull validation checks ahead of functional code
  hw/mem/cxl_type3: CDAT pre-allocate and check resources prior to work
  hw/mem/cxl_type3: Change the CDAT allocation/free strategy
  hw/mem/cxl_type3: Refactor CDAT sub-table entry initialization into a
function

 hw/mem/cxl_type3.c | 240 +++--
 1 file changed, 122 insertions(+), 118 deletions(-)

-- 
2.37.3

[PATCH 5/5] hw/mem/cxl_type3: Refactor CDAT sub-table entry initialization into a function

2022-10-12 Thread Gregory Price

The CDAT can contain multiple entries for multiple memory regions, this
will allow us to re-use the initialization code when volatile memory
region support is added.

Signed-off-by: Gregory Price 
---
 hw/mem/cxl_type3.c | 137 -
 1 file changed, 72 insertions(+), 65 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 220b9f09a9..3c5485abd0 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -19,117 +19,93 @@
 #define DWORD_BYTE 4
 #define CT3_CDAT_SUBTABLE_SIZE 6
 
-static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
-void *priv)
+static int ct3_build_cdat_subtable(CDATSubHeader **cdat_table,
+MemoryRegion *mr, int dsmad_handle)
 {
-g_autofree CDATDsmas *dsmas_nonvolatile = NULL;
-g_autofree CDATDslbis *dslbis_nonvolatile1 = NULL;
-g_autofree CDATDslbis *dslbis_nonvolatile2 = NULL;
-g_autofree CDATDslbis *dslbis_nonvolatile3 = NULL;
-g_autofree CDATDslbis *dslbis_nonvolatile4 = NULL;
-g_autofree CDATDsemts *dsemts_nonvolatile = NULL;
-CXLType3Dev *ct3d = priv;
-int next_dsmad_handle = 0;
-int nonvolatile_dsmad = -1;
-MemoryRegion *mr;
-
-if (!ct3d->hostmem) {
-return 0;
-}
-
-mr = host_memory_backend_get_memory(ct3d->hostmem);
-if (!mr) {
-return -EINVAL;
-}
-
-*cdat_table = g_malloc0(CT3_CDAT_SUBTABLE_SIZE * sizeof(*cdat_table));
-if (!*cdat_table) {
-return -ENOMEM;
-}
-
-/* Non volatile aspects */
-dsmas_nonvolatile = g_malloc(sizeof(*dsmas_nonvolatile));
-dslbis_nonvolatile1 = g_malloc(sizeof(*dslbis_nonvolatile1));
-dslbis_nonvolatile2 = g_malloc(sizeof(*dslbis_nonvolatile2));
-dslbis_nonvolatile3 = g_malloc(sizeof(*dslbis_nonvolatile3));
-dslbis_nonvolatile4 = g_malloc(sizeof(*dslbis_nonvolatile4));
-dsemts_nonvolatile = g_malloc(sizeof(*dsemts_nonvolatile));
-
-if (!dsmas_nonvolatile || !dsemts_nonvolatile ||
-!dslbis_nonvolatile1 || !dslbis_nonvolatile2 ||
-!dslbis_nonvolatile3 || !dslbis_nonvolatile4) {
-g_free(*cdat_table);
-*cdat_table = NULL;
+g_autofree CDATDsmas *dsmas = NULL;
+g_autofree CDATDslbis *dslbis1 = NULL;
+g_autofree CDATDslbis *dslbis2 = NULL;
+g_autofree CDATDslbis *dslbis3 = NULL;
+g_autofree CDATDslbis *dslbis4 = NULL;
+g_autofree CDATDsemts *dsemts = NULL;
+
+dsmas = g_malloc(sizeof(*dsmas));
+dslbis1 = g_malloc(sizeof(*dslbis1));
+dslbis2 = g_malloc(sizeof(*dslbis2));
+dslbis3 = g_malloc(sizeof(*dslbis3));
+dslbis4 = g_malloc(sizeof(*dslbis4));
+dsemts = g_malloc(sizeof(*dsemts));
+
+if (!dsmas || !dslbis1 || !dslbis2 || !dslbis3 || !dslbis4 || !dsemts) {
 return -ENOMEM;
 }
 
-nonvolatile_dsmad = next_dsmad_handle++;
-*dsmas_nonvolatile = (CDATDsmas) {
+*dsmas = (CDATDsmas) {
 .header = {
 .type = CDAT_TYPE_DSMAS,
-.length = sizeof(*dsmas_nonvolatile),
+.length = sizeof(*dsmas),
 },
-.DSMADhandle = nonvolatile_dsmad,
+.DSMADhandle = dsmad_handle,
 .flags = CDAT_DSMAS_FLAG_NV,
 .DPA_base = 0,
 .DPA_length = int128_get64(mr->size),
 };
 
 /* For now, no memory side cache, plausiblish numbers */
-*dslbis_nonvolatile1 = (CDATDslbis) {
+*dslbis1 = (CDATDslbis) {
 .header = {
 .type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile1),
+.length = sizeof(*dslbis1),
 },
-.handle = nonvolatile_dsmad,
+.handle = dsmad_handle,
 .flags = HMAT_LB_MEM_MEMORY,
 .data_type = HMAT_LB_DATA_READ_LATENCY,
 .entry_base_unit = 1, /* 10ns base */
 .entry[0] = 15, /* 150ns */
 };
 
-*dslbis_nonvolatile2 = (CDATDslbis) {
+*dslbis2 = (CDATDslbis) {
 .header = {
 .type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile2),
+.length = sizeof(*dslbis2),
 },
-.handle = nonvolatile_dsmad,
+.handle = dsmad_handle,
 .flags = HMAT_LB_MEM_MEMORY,
 .data_type = HMAT_LB_DATA_WRITE_LATENCY,
 .entry_base_unit = 1,
 .entry[0] = 25, /* 250ns */
 };
 
-*dslbis_nonvolatile3 = (CDATDslbis) {
+*dslbis3 = (CDATDslbis) {
 .header = {
 .type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile3),
+.length = sizeof(*dslbis3),
 },
-.handle = nonvolatile_dsmad,
+.handle = dsmad_handle,
 .flags = HMAT_LB_MEM_MEMORY,
 .data_type = HMAT_LB_DATA_READ_BANDWIDTH,
 .entry_base_unit = 1000, /* GB/s */
 .entry[0] = 16,
 };
 
-*dslbis_nonvolatile4 = (CDATDslbis) {
+*dslbis4 = (CDATDslbis) {
 .header = {
 .type = CDAT_TYPE_DSLBIS,
-.length = sizeof(*dslbis_nonvolatile4),
+.length

[PATCH 1/5] hw/mem/cxl_type3: fix checkpatch errors

2022-10-12 Thread Gregory Price

This fixes checkpatch errors in the prior commit.

Signed-off-by: Gregory Price 
---
 hw/mem/cxl_type3.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 3fa5d70662..94bc439d89 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -56,9 +56,11 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 len++;
 
 /* For now, no memory side cache, plausiblish numbers */
-dslbis_nonvolatile = g_malloc(sizeof(*dslbis_nonvolatile) * 
dslbis_nonvolatile_num);
-if (!dslbis_nonvolatile)
+dslbis_nonvolatile =
+g_malloc(sizeof(*dslbis_nonvolatile) * dslbis_nonvolatile_num);
+if (!dslbis_nonvolatile) {
 return -ENOMEM;
+}
 
 dslbis_nonvolatile[0] = (CDATDslbis) {
 .header = {
@@ -85,7 +87,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 .entry[0] = 25, /* 250ns */
 };
 len++;
-   
+
 dslbis_nonvolatile[2] = (CDATDslbis) {
 .header = {
 .type = CDAT_TYPE_DSLBIS,
@@ -123,7 +125,8 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 .length = sizeof(*dsemts_nonvolatile),
 },
 .DSMAS_handle = nonvolatile_dsmad,
-.EFI_memory_type_attr = 2, /* Reserved - the non volatile from 
DSMAS matters */
+/* Reserved - the non volatile from DSMAS matters */
+.EFI_memory_type_attr = 2,
 .DPA_offset = 0,
 .DPA_length = int128_get64(mr->size),
 };
@@ -136,7 +139,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 (*cdat_table)[i++] = g_steal_pointer(_nonvolatile);
 }
 if (dslbis_nonvolatile) {
-CDATDslbis *dslbis = g_steal_pointer(_nonvolatile);
+CDATDslbis *dslbis = g_steal_pointer(_nonvolatile);
 int j;
 
 for (j = 0; j < dslbis_nonvolatile_num; j++) {
@@ -146,7 +149,7 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
 if (dsemts_nonvolatile) {
 (*cdat_table)[i++] = g_steal_pointer(_nonvolatile);
 }
-
+
 return len;
 }
 
-- 
2.37.3

Re: [PATCH RFC 0/2] qemu-thread: Strict unlock check

2022-10-12 Thread Peter Xu

On Tue, Oct 11, 2022 at 06:41:52PM -0400, Peter Xu wrote:
> NOTE: mark patchset RFC because "make check" will easily fail; but I didn't
> yet dig into why as I'm not familiar with the code paths that triggers, it
> can be bugs hidden or something I missed.  So RFC to just have some thoughts.

I just noticed (after reminded from Dave) that the reclock was actually the
recursive lock, which definitely won't work with patch 2 at all.

OTOH I also noticed PTHREAD_MUTEX_ERRORCHECK which does the same unlock
check that we can leverage (and it'll also check re-lock from the same
thread which causes deadlock).  I'll give that a shot instead.

Please ignore this version.  Patch 1 is still meaningful I think, but
anyway I'll repost.  Sorry for the noise.

-- 
Peter Xu

Re: [PATCH 3/4] qtest: Improve error messages when property can not be set right now

2022-10-12 Thread Thomas Huth


On 12/10/2022 17.38, Markus Armbruster wrote:

When you try to set qtest property "log" while the qtest object is
active, the error message blames "insufficient permission":

 $ qemu-system-x86_64 -S -display none -nodefaults -monitor stdio -chardev 
socket,id=chrqt0,path=qtest.socket,server=on,wait=off -object 
qtest,id=qt0,chardev=chrqt0,log=/dev/null
 QEMU 7.1.50 monitor - type 'help' for more information
 (qemu) qom-set /objects/qt0 log qtest.log
 Error: Insufficient permission to perform this operation

This implies it could work with "sufficient permission".  It can't.
Change the error message to:

 Error: Property 'log' can not be set now


Can it be set later? ... if not, that error message is almost as confusing 
as the original one. Maybe it's better to tell the users *when* they can set 
the property?


 Thomas

Re: [PATCH v2 06/15] migration: Yield bitmap_mutex properly when sending/sleeping

2022-10-12 Thread Peter Xu

On Wed, Oct 12, 2022 at 05:43:53PM +0100, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:
> > Don't take the bitmap mutex when sending pages, or when being throttled by
> > migration_rate_limit() (which is a bit tricky to call it here in ram code,
> > but seems still helpful).
> > 
> > It prepares for the possibility of concurrently sending pages in >1 threads
> > using the function ram_save_host_page() because all threads may need the
> > bitmap_mutex to operate on bitmaps, so that either sendmsg() or any kind of
> > qemu_sem_wait() blocking for one thread will not block the other from
> > progressing.
> > 
> > Signed-off-by: Peter Xu 
> 
> Reviewed-by: Dr. David Alan Gilbert 
> 
> although a comment above the reclaration of ram_save_host_pages saying
> it can drop the lock would be veyr good.

Let me add that.  Thanks,

-- 
Peter Xu

[PATCH] hw/virtio/virtio-iommu-pci: Enforce the device is plugged on the root bus

2022-10-12 Thread Eric Auger

In theory the virtio-iommu-pci could be plugged anywhere in the PCIe
topology and as long as the dt/acpi info are properly built this should
work. However at the moment we fail to do that because the
virtio-iommu-pci BDF is not computed at plug time and in that case
vms->virtio_iommu_bdf gets an incorrect value.

For instance if the virtio-iommu-pci is plugged onto a pcie root port
and the virtio-iommu protects a virtio-block-pci device the guest does
not boot.

So let's do not pretend we do support this case and fail the initialize()
if we detect the virtio-iommu-pci is plugged anywhere else than on the
root bus. Anyway this ability is not needed.

Signed-off-by: Eric Auger 
---
 hw/virtio/virtio-iommu-pci.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c
index 79ea8334f0..7ef2f9dcdb 100644
--- a/hw/virtio/virtio-iommu-pci.c
+++ b/hw/virtio/virtio-iommu-pci.c
@@ -17,6 +17,7 @@
 #include "hw/qdev-properties-system.h"
 #include "qapi/error.h"
 #include "hw/boards.h"
+#include "hw/pci/pci_bus.h"
 #include "qom/object.h"
 
 typedef struct VirtIOIOMMUPCI VirtIOIOMMUPCI;
@@ -44,6 +45,7 @@ static Property virtio_iommu_pci_properties[] = {
 static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
 {
 VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(vpci_dev);
+PCIBus *pbus = pci_get_bus(_dev->pci_dev);
 DeviceState *vdev = DEVICE(>vdev);
 VirtIOIOMMU *s = VIRTIO_IOMMU(vdev);
 
@@ -57,11 +59,17 @@ static void virtio_iommu_pci_realize(VirtIOPCIProxy 
*vpci_dev, Error **errp)
 s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) {
 error_setg(errp, "reserved region %d has an invalid type", i);
 error_append_hint(errp, "Valid values are 0 and 1\n");
+return;
 }
 }
+if (!pci_bus_is_root(pbus)) {
+error_setg(errp, "virtio-iommu-pci must be plugged on the root bus");
+return;
+}
+
 object_property_set_link(OBJECT(dev), "primary-bus",
- OBJECT(pci_get_bus(_dev->pci_dev)),
- _abort);
+ OBJECT(pbus), _abort);
+
 virtio_pci_force_virtio_1(vpci_dev);
 qdev_realize(vdev, BUS(_dev->bus), errp);
 }
-- 
2.31.1

[PATCH v10 9/9] docs/s390x: document s390x cpu topology

2022-10-12 Thread Pierre Morel

Add some basic examples for the definition of cpu topology
in s390x.

Signed-off-by: Pierre Morel 
---
 docs/system/s390x/cpu_topology.rst | 80 ++
 1 file changed, 80 insertions(+)
 create mode 100644 docs/system/s390x/cpu_topology.rst

diff --git a/docs/system/s390x/cpu_topology.rst 
b/docs/system/s390x/cpu_topology.rst
new file mode 100644
index 00..1dcd24cbbc
--- /dev/null
+++ b/docs/system/s390x/cpu_topology.rst
@@ -0,0 +1,80 @@
+CPU Topology on s390x
+=
+
+CPU Topology on S390x provides up to 5 levels of topology containers:
+nodes, drawers, books, sockets and CPUs.
+While the higher level containers, Containers Topology List Entries,
+(Containers TLE) define a tree hierarchy, the lowest level of topology
+definition, the CPU Topology List Entry (CPU TLE), provides the placement
+of the CPUs inside the parent container.
+
+Currently QEMU CPU topology uses a single level of container: the sockets.
+
+For backward compatibility, threads can be declared on the ``-smp`` command
+line. They will be seen as CPUs by the guest as long as multithreading
+is not really supported by QEMU for S390.
+
+Prerequisites
+-
+
+To use CPU Topology a Linux QEMU/KVM machine providing the CPU Topology 
facility
+(STFLE bit 11) is required.
+
+However, since this facility has been enabled by default in an early version
+of QEMU, we use a capability, ``KVM_CAP_S390_CPU_TOPOLOGY``, to notify KVM
+QEMU use of the CPU Topology.
+
+Indicating the CPU topology to the Virtual Machine
+--
+
+The CPU Topology, can be specified on the QEMU command line
+with the ``-smp`` or the ``-device`` qemu command arguments.
+
+Like in :
+
+.. code-block:: sh
+-smp cpus=5,sockets=8,cores=2,threads=2,maxcpus=32
+-device host-s390x-cpu,core-id=14
+
+New CPUs can be plugged using the device_add hmp command like in:
+
+.. code-block:: sh
+   (qemu) device_add host-s390x-cpu,core-id=9
+
+The core-id defines the placement of the core in the topology by
+starting with core 0 in socket 0 up to maxcpus.
+
+In the example above:
+
+* There are 5 cpus provided to the guest with the ``-smp`` command line
+  They will take the core-ids 0,1,2,3,4
+  As we have 2 threads in 2 cores in a socket, we have 4 cpus provided
+  to the guest in socket 0, with core-ids 0,1,2,3.
+  The last cpu, with core-id 4, will be on socket 1.
+
+* the core with ID 14 provided by the ``-device`` command line will
+  be placed in socket 3, with core-id 14
+
+* the core with ID 9 provided by the ``device_add`` qmp command will
+  be placed in socket 2, with core-id 9
+
+Note that the core ID is machine wide and the CPU TLE masks provided
+by the STSI instruction will be:
+
+* in socket 0: 0xf000 (core id 0,1,2,3)
+* in socket 1: 0x0040 (core id 9)
+* in socket 1: 0x0002 (core id 14)
+
+Migration
+-
+
+For virtio-ccw machines older than s390-virtio-ccw-7.3, CPU Topoogy is
+unavailable.
+
+CPU Topoogy is by default enabled for s390-virtio-ccw-7.3 and newer machines.
+
+Disabling CPU topology can be done by setting the global option
+``topology-disable`` to ``on`` like in:
+
+.. code-block:: sh
+   -machine s390-ccw-virtio-7.3,accel=kvm,topology-disable=on
-- 
2.31.1

Re: [PATCH v2 06/15] migration: Yield bitmap_mutex properly when sending/sleeping

2022-10-12 Thread Dr. David Alan Gilbert

* Peter Xu (pet...@redhat.com) wrote:
> Don't take the bitmap mutex when sending pages, or when being throttled by
> migration_rate_limit() (which is a bit tricky to call it here in ram code,
> but seems still helpful).
> 
> It prepares for the possibility of concurrently sending pages in >1 threads
> using the function ram_save_host_page() because all threads may need the
> bitmap_mutex to operate on bitmaps, so that either sendmsg() or any kind of
> qemu_sem_wait() blocking for one thread will not block the other from
> progressing.
> 
> Signed-off-by: Peter Xu 

Reviewed-by: Dr. David Alan Gilbert 

although a comment above the reclaration of ram_save_host_pages saying
it can drop the lock would be veyr good.

Dave


> ---
>  migration/ram.c | 41 ++---
>  1 file changed, 30 insertions(+), 11 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index b9ac2d6921..578ad8d70a 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2462,6 +2462,7 @@ static void postcopy_preempt_reset_channel(RAMState *rs)
>   */
>  static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
>  {
> +bool page_dirty, preempt_active = postcopy_preempt_active();
>  int tmppages, pages = 0;
>  size_t pagesize_bits =
>  qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
> @@ -2485,22 +2486,40 @@ static int ram_save_host_page(RAMState *rs, 
> PageSearchStatus *pss)
>  break;
>  }
>  
> -/* Check the pages is dirty and if it is send it */
> -if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
> -tmppages = ram_save_target_page(rs, pss);
> -if (tmppages < 0) {
> -return tmppages;
> -}
> +page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page);
>  
> -pages += tmppages;
> +/* Check the pages is dirty and if it is send it */
> +if (page_dirty) {
>  /*
> - * Allow rate limiting to happen in the middle of huge pages if
> - * something is sent in the current iteration.
> + * Properly yield the lock only in postcopy preempt mode
> + * because both migration thread and rp-return thread can
> + * operate on the bitmaps.
>   */
> -if (pagesize_bits > 1 && tmppages > 0) {
> -migration_rate_limit();
> +if (preempt_active) {
> +qemu_mutex_unlock(>bitmap_mutex);
> +}
> +tmppages = ram_save_target_page(rs, pss);
> +if (tmppages >= 0) {
> +pages += tmppages;
> +/*
> + * Allow rate limiting to happen in the middle of huge pages 
> if
> + * something is sent in the current iteration.
> + */
> +if (pagesize_bits > 1 && tmppages > 0) {
> +migration_rate_limit();
> +}
>  }
> +if (preempt_active) {
> +qemu_mutex_lock(>bitmap_mutex);
> +}
> +} else {
> +tmppages = 0;
>  }
> +
> +if (tmppages < 0) {
> +return tmppages;
> +}
> +
>  pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
>  } while ((pss->page < hostpage_boundary) &&
>   offset_in_ramblock(pss->block,
> -- 
> 2.37.3
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[PATCH v10 7/9] s390x/cpu topology: add max_threads machine class attribute

2022-10-12 Thread Pierre Morel

The S390 CPU topology accepts the smp.threads argument while
in reality it does not effectively allow multthreading.

Let's keep this behavior for machines older than 7.3 and
refuse to use threads in newer machines until multithreading
is really proposed to the guest by the machine.

Signed-off-by: Pierre Morel 
---
 include/hw/s390x/s390-virtio-ccw.h |  1 +
 hw/s390x/s390-virtio-ccw.c | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index 6c4b4645fc..319dfac1bb 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -48,6 +48,7 @@ struct S390CcwMachineClass {
 bool css_migration_enabled;
 bool hpage_1m_allowed;
 bool topology_allowed;
+int max_threads;
 };
 
 /* runtime-instrumentation allowed by the machine */
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 3a13fad4df..d6ce31d168 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -85,8 +85,15 @@ out:
 static void s390_init_cpus(MachineState *machine)
 {
 MachineClass *mc = MACHINE_GET_CLASS(machine);
+S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
 int i;
 
+if (machine->smp.threads > s390mc->max_threads) {
+error_report("S390 does not support more than %d threads.",
+ s390mc->max_threads);
+exit(1);
+}
+
 /* initialize possible_cpus */
 mc->possible_cpu_arch_ids(machine);
 
@@ -617,6 +624,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void 
*data)
 s390mc->css_migration_enabled = true;
 s390mc->hpage_1m_allowed = true;
 s390mc->topology_allowed = true;
+s390mc->max_threads = 1;
 mc->init = ccw_init;
 mc->reset = s390_machine_reset;
 mc->block_default_type = IF_VIRTIO;
@@ -887,12 +895,14 @@ static void ccw_machine_7_2_class_options(MachineClass 
*mc)
 S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
 static GlobalProperty compat[] = {
 { TYPE_S390_CPU_TOPOLOGY, "topology-allowed", "off", },
+{ TYPE_S390_CPU_TOPOLOGY, "max_threads", "off", },
 };
 
 ccw_machine_7_3_class_options(mc);
 compat_props_add(mc->compat_props, hw_compat_7_2, hw_compat_7_2_len);
 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 s390mc->topology_allowed = false;
+s390mc->max_threads = S390_MAX_CPUS;
 }
 DEFINE_CCW_MACHINE(7_2, "7.2", false);
 
-- 
2.31.1

[PATCH v10 3/9] s390x/cpu_topology: resetting the Topology-Change-Report

2022-10-12 Thread Pierre Morel

During a subsystem reset the Topology-Change-Report is cleared
by the machine.
Let's ask KVM to clear the Modified Topology Change Report (MTCR)
 bit of the SCA in the case of a subsystem reset.

Signed-off-by: Pierre Morel 
Reviewed-by: Nico Boehr 
Reviewed-by: Janis Schoetterl-Glausch 
---
 target/s390x/cpu.h   |  1 +
 target/s390x/kvm/kvm_s390x.h |  1 +
 hw/s390x/cpu-topology.c  | 12 
 hw/s390x/s390-virtio-ccw.c   |  1 +
 target/s390x/cpu-sysemu.c|  7 +++
 target/s390x/kvm/kvm.c   | 23 +++
 6 files changed, 45 insertions(+)

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index d604aa9c78..9b35795ac8 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -825,6 +825,7 @@ void s390_enable_css_support(S390CPU *cpu);
 void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data arg);
 int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id,
 int vq, bool assign);
+void s390_cpu_topology_reset(void);
 #ifndef CONFIG_USER_ONLY
 unsigned int s390_cpu_set_state(uint8_t cpu_state, S390CPU *cpu);
 #else
diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h
index aaae8570de..a13c8fb9a3 100644
--- a/target/s390x/kvm/kvm_s390x.h
+++ b/target/s390x/kvm/kvm_s390x.h
@@ -46,5 +46,6 @@ void kvm_s390_crypto_reset(void);
 void kvm_s390_restart_interrupt(S390CPU *cpu);
 void kvm_s390_stop_interrupt(S390CPU *cpu);
 void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info);
+int kvm_s390_topology_set_mtcr(uint64_t attr);
 
 #endif /* KVM_S390X_H */
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index c73cebfe6f..9f202621d0 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -107,6 +107,17 @@ static void s390_topology_realize(DeviceState *dev, Error 
**errp)
 qemu_mutex_init(>topo_mutex);
 }
 
+/**
+ * s390_topology_reset:
+ * @dev: the device
+ *
+ * Calls the sysemu topology reset
+ */
+static void s390_topology_reset(DeviceState *dev)
+{
+s390_cpu_topology_reset();
+}
+
 /**
  * topology_class_init:
  * @oc: Object class
@@ -120,6 +131,7 @@ static void topology_class_init(ObjectClass *oc, void *data)
 
 dc->realize = s390_topology_realize;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+dc->reset = s390_topology_reset;
 }
 
 static const TypeInfo cpu_topology_info = {
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index aa99a62e42..362378454a 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -113,6 +113,7 @@ static const char *const reset_dev_types[] = {
 "s390-flic",
 "diag288",
 TYPE_S390_PCI_HOST_BRIDGE,
+TYPE_S390_CPU_TOPOLOGY,
 };
 
 static void subsystem_reset(void)
diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c
index 948e4bd3e0..707c0b658c 100644
--- a/target/s390x/cpu-sysemu.c
+++ b/target/s390x/cpu-sysemu.c
@@ -306,3 +306,10 @@ void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data 
arg)
 kvm_s390_set_diag318(cs, arg.host_ulong);
 }
 }
+
+void s390_cpu_topology_reset(void)
+{
+if (kvm_enabled()) {
+kvm_s390_topology_set_mtcr(0);
+}
+}
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index f96630440b..9c994d27d5 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2585,3 +2585,26 @@ int kvm_s390_get_zpci_op(void)
 {
 return cap_zpci_op;
 }
+
+int kvm_s390_topology_set_mtcr(uint64_t attr)
+{
+struct kvm_device_attr attribute = {
+.group = KVM_S390_VM_CPU_TOPOLOGY,
+.attr  = attr,
+};
+int ret;
+
+if (!s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY)) {
+return -EFAULT;
+}
+if (!kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_TOPOLOGY, attr)) {
+return -ENOENT;
+}
+
+ret = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, );
+if (ret) {
+error_report("Failed to set cpu topology attribute %lu: %s",
+ attr, strerror(-ret));
+}
+return ret;
+}
-- 
2.31.1

[PATCH v10 5/9] target/s390x: interception of PTF instruction

2022-10-12 Thread Pierre Morel

When the host supports the CPU topology facility, the PTF
instruction with function code 2 is interpreted by the SIE,
provided that the userland hypervizor activates the interpretation
by using the KVM_CAP_S390_CPU_TOPOLOGY KVM extension.

The PTF instructions with function code 0 and 1 are intercepted
and must be emulated by the userland hypervizor.

Signed-off-by: Pierre Morel 
Reviewed-by: Janis Schoetterl-Glausch 
---
 include/hw/s390x/s390-virtio-ccw.h |  6 
 hw/s390x/cpu-topology.c| 52 ++
 target/s390x/kvm/kvm.c | 11 +++
 3 files changed, 69 insertions(+)

diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index 8a0090a071..9e7a0d75bc 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -31,6 +31,12 @@ struct S390CcwMachineState {
 uint8_t loadparm[8];
 };
 
+#define S390_PTF_REASON_NONE (0x00 << 8)
+#define S390_PTF_REASON_DONE (0x01 << 8)
+#define S390_PTF_REASON_BUSY (0x02 << 8)
+#define S390_TOPO_FC_MASK 0xffUL
+void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra);
+
 struct S390CcwMachineClass {
 /*< private >*/
 MachineClass parent_class;
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 349f0ad89d..2ad516a97d 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -20,6 +20,58 @@
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "hw/s390x/cpu-topology.h"
 #include "migration/vmstate.h"
+#include "target/s390x/cpu.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+
+/*
+ * s390_handle_ptf:
+ *
+ * @register 1: contains the function code
+ *
+ * Function codes 0 and 1 handle the CPU polarization.
+ * We assume an horizontal topology, the only one supported currently
+ * by Linux, consequently we answer to function code 0, requesting
+ * horizontal polarization that it is already the current polarization
+ * and reject vertical polarization request without further explanation.
+ *
+ * Function code 2 is handling topology changes and is interpreted
+ * by the SIE.
+ */
+void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra)
+{
+CPUS390XState *env = >env;
+uint64_t reg = env->regs[r1];
+uint8_t fc = reg & S390_TOPO_FC_MASK;
+
+if (!s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY)) {
+s390_program_interrupt(env, PGM_OPERATION, ra);
+return;
+}
+
+if (env->psw.mask & PSW_MASK_PSTATE) {
+s390_program_interrupt(env, PGM_PRIVILEGED, ra);
+return;
+}
+
+if (reg & ~S390_TOPO_FC_MASK) {
+s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+return;
+}
+
+switch (fc) {
+case 0:/* Horizontal polarization is already set */
+env->regs[r1] |= S390_PTF_REASON_DONE;
+setcc(cpu, 2);
+break;
+case 1:/* Vertical polarization is not supported */
+env->regs[r1] |= S390_PTF_REASON_NONE;
+setcc(cpu, 2);
+break;
+default:
+/* Note that fc == 2 is interpreted by the SIE */
+s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+}
+}
 
 S390Topology *s390_get_topology(void)
 {
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 9c994d27d5..49a99931a4 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -97,6 +97,7 @@
 
 #define PRIV_B9_EQBS0x9c
 #define PRIV_B9_CLP 0xa0
+#define PRIV_B9_PTF 0xa2
 #define PRIV_B9_PCISTG  0xd0
 #define PRIV_B9_PCILG   0xd2
 #define PRIV_B9_RPCIT   0xd3
@@ -1463,6 +1464,13 @@ static int kvm_mpcifc_service_call(S390CPU *cpu, struct 
kvm_run *run)
 }
 }
 
+static void kvm_handle_ptf(S390CPU *cpu, struct kvm_run *run)
+{
+uint8_t r1 = (run->s390_sieic.ipb >> 20) & 0x0f;
+
+s390_handle_ptf(cpu, r1, RA_IGNORED);
+}
+
 static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
 {
 int r = 0;
@@ -1480,6 +1488,9 @@ static int handle_b9(S390CPU *cpu, struct kvm_run *run, 
uint8_t ipa1)
 case PRIV_B9_RPCIT:
 r = kvm_rpcit_service_call(cpu, run);
 break;
+case PRIV_B9_PTF:
+kvm_handle_ptf(cpu, run);
+break;
 case PRIV_B9_EQBS:
 /* just inject exception */
 r = -1;
-- 
2.31.1

[PATCH v10 8/9] s390x/cpu_topology: activating CPU topology

2022-10-12 Thread Pierre Morel

The KVM capability, KVM_CAP_S390_CPU_TOPOLOGY is used to
activate the S390_FEAT_CONFIGURATION_TOPOLOGY feature and
the topology facility for the guest in the case the topology
is available in QEMU and in KVM.

Signed-off-by: Pierre Morel 
---
 target/s390x/kvm/kvm.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 49a99931a4..1c4e24c9a7 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2464,6 +2464,22 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
Error **errp)
 set_bit(S390_FEAT_UNPACK, model->features);
 }
 
+/*
+ * If we have the CPU Topology allowed in QEMU and
+ * implemented in KVM, activate the CPU TOPOLOGY feature.
+ */
+if (s390_has_topology()) {
+if (!kvm_check_extension(kvm_state, KVM_CAP_S390_CPU_TOPOLOGY)) {
+error_setg(errp, "KVM: S390 topology not present in kernel");
+return;
+}
+if (kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_CPU_TOPOLOGY, 0) < 0) {
+error_setg(errp, "KVM: Error enabling KVM_CAP_S390_CPU_TOPOLOGY");
+return;
+}
+set_bit(S390_FEAT_CONFIGURATION_TOPOLOGY, model->features);
+}
+
 /* We emulate a zPCI bus and AEN, therefore we don't need HW support */
 set_bit(S390_FEAT_ZPCI, model->features);
 set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, model->features);
-- 
2.31.1

[PATCH v10 4/9] s390x/cpu_topology: CPU topology migration

2022-10-12 Thread Pierre Morel

The migration can only take place if both source and destination
of the migration both use or both do not use the CPU topology
facility.

We indicate a change in topology during migration postload for the
case the topology changed between source and destination.

Signed-off-by: Pierre Morel 
---
 include/hw/s390x/cpu-topology.h |  1 +
 target/s390x/cpu.h  |  1 +
 hw/s390x/cpu-topology.c | 79 +
 target/s390x/cpu-sysemu.c   |  8 
 4 files changed, 89 insertions(+)

diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index 61c11db017..35a8a981ec 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -28,6 +28,7 @@ typedef struct S390TopoTLE {
 struct S390Topology {
 SysBusDevice parent_obj;
 int cpus;
+bool topology_needed;
 S390TopoContainer *socket;
 S390TopoTLE *tle;
 MachineState *ms;
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 9b35795ac8..8495bfafde 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -826,6 +826,7 @@ void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data 
arg);
 int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id,
 int vq, bool assign);
 void s390_cpu_topology_reset(void);
+int s390_cpu_topology_mtcr_set(void);
 #ifndef CONFIG_USER_ONLY
 unsigned int s390_cpu_set_state(uint8_t cpu_state, S390CPU *cpu);
 #else
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 9f202621d0..349f0ad89d 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -19,6 +19,7 @@
 #include "target/s390x/cpu.h"
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "hw/s390x/cpu-topology.h"
+#include "migration/vmstate.h"
 
 S390Topology *s390_get_topology(void)
 {
@@ -118,6 +119,83 @@ static void s390_topology_reset(DeviceState *dev)
 s390_cpu_topology_reset();
 }
 
+/**
+ * cpu_topology_postload
+ * @opaque: a pointer to the S390Topology
+ * @version_id: version identifier
+ *
+ * We check that the topology is used or is not used
+ * on both side identically.
+ *
+ * If the topology is in use we set the Modified Topology Change Report
+ * on the destination host.
+ */
+static int cpu_topology_postload(void *opaque, int version_id)
+{
+S390Topology *topo = opaque;
+int ret;
+
+if (topo->topology_needed != s390_has_topology()) {
+if (topo->topology_needed) {
+error_report("Topology facility is needed in destination");
+} else {
+error_report("Topology facility can not be used in destination");
+}
+return -EINVAL;
+}
+
+/* We do not support CPU Topology, all is good */
+if (!s390_has_topology()) {
+return 0;
+}
+
+/* We support CPU Topology, set the MTCR unconditionally */
+ret = s390_cpu_topology_mtcr_set();
+if (ret) {
+error_report("Failed to set MTCR: %s", strerror(-ret));
+}
+return ret;
+}
+
+/**
+ * cpu_topology_presave:
+ * @opaque: The pointer to the S390Topology
+ *
+ * Save the usage of the CPU Topology in the VM State.
+ */
+static int cpu_topology_presave(void *opaque)
+{
+S390Topology *topo = opaque;
+
+topo->topology_needed = s390_has_topology();
+return 0;
+}
+
+/**
+ * cpu_topology_needed:
+ * @opaque: The pointer to the S390Topology
+ *
+ * We always need to know if source and destination use the topology.
+ */
+static bool cpu_topology_needed(void *opaque)
+{
+return true;
+}
+
+
+const VMStateDescription vmstate_cpu_topology = {
+.name = "cpu_topology",
+.version_id = 1,
+.post_load = cpu_topology_postload,
+.pre_save = cpu_topology_presave,
+.minimum_version_id = 1,
+.needed = cpu_topology_needed,
+.fields = (VMStateField[]) {
+VMSTATE_BOOL(topology_needed, S390Topology),
+VMSTATE_END_OF_LIST()
+}
+};
+
 /**
  * topology_class_init:
  * @oc: Object class
@@ -132,6 +210,7 @@ static void topology_class_init(ObjectClass *oc, void *data)
 dc->realize = s390_topology_realize;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 dc->reset = s390_topology_reset;
+dc->vmsd = _cpu_topology;
 }
 
 static const TypeInfo cpu_topology_info = {
diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c
index 707c0b658c..78cb11c0f8 100644
--- a/target/s390x/cpu-sysemu.c
+++ b/target/s390x/cpu-sysemu.c
@@ -313,3 +313,11 @@ void s390_cpu_topology_reset(void)
 kvm_s390_topology_set_mtcr(0);
 }
 }
+
+int s390_cpu_topology_mtcr_set(void)
+{
+if (kvm_enabled()) {
+return kvm_s390_topology_set_mtcr(1);
+}
+return -ENOENT;
+}
-- 
2.31.1

[PATCH v10 2/9] s390x/cpu topology: reporting the CPU topology to the guest

2022-10-12 Thread Pierre Morel

The guest can use the STSI instruction to get a buffer filled
with the CPU topology description.

Let us implement the STSI instruction for the basis CPU topology
level, level 2.

Signed-off-by: Pierre Morel 
---
 include/hw/s390x/cpu-topology.h |   3 +
 target/s390x/cpu.h  |  48 ++
 hw/s390x/cpu-topology.c |   8 ++-
 target/s390x/cpu_topology.c | 109 
 target/s390x/kvm/kvm.c  |   6 +-
 target/s390x/meson.build|   1 +
 6 files changed, 172 insertions(+), 3 deletions(-)
 create mode 100644 target/s390x/cpu_topology.c

diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index 66c171d0bc..61c11db017 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -13,6 +13,8 @@
 #include "hw/qdev-core.h"
 #include "qom/object.h"
 
+#define S390_TOPOLOGY_POLARITY_H  0x00
+
 typedef struct S390TopoContainer {
 int active_count;
 } S390TopoContainer;
@@ -29,6 +31,7 @@ struct S390Topology {
 S390TopoContainer *socket;
 S390TopoTLE *tle;
 MachineState *ms;
+QemuMutex topo_mutex;
 };
 
 #define TYPE_S390_CPU_TOPOLOGY "s390-topology"
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 7d6d01325b..d604aa9c78 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -565,6 +565,52 @@ typedef union SysIB {
 } SysIB;
 QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
 
+/* CPU type Topology List Entry */
+typedef struct SysIBTl_cpu {
+uint8_t nl;
+uint8_t reserved0[3];
+uint8_t reserved1:5;
+uint8_t dedicated:1;
+uint8_t polarity:2;
+uint8_t type;
+uint16_t origin;
+uint64_t mask;
+} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
+
+/* Container type Topology List Entry */
+typedef struct SysIBTl_container {
+uint8_t nl;
+uint8_t reserved[6];
+uint8_t id;
+} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
+
+#define TOPOLOGY_NR_MAG  6
+#define TOPOLOGY_NR_MAG6 0
+#define TOPOLOGY_NR_MAG5 1
+#define TOPOLOGY_NR_MAG4 2
+#define TOPOLOGY_NR_MAG3 3
+#define TOPOLOGY_NR_MAG2 4
+#define TOPOLOGY_NR_MAG1 5
+/* Configuration topology */
+typedef struct SysIB_151x {
+uint8_t  reserved0[2];
+uint16_t length;
+uint8_t  mag[TOPOLOGY_NR_MAG];
+uint8_t  reserved1;
+uint8_t  mnest;
+uint32_t reserved2;
+char tle[0];
+} QEMU_PACKED QEMU_ALIGNED(8) SysIB_151x;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
+
+/* Maxi size of a SYSIB structure is when all CPU are alone in a container */
+#define S390_TOPOLOGY_SYSIB_SIZE (sizeof(SysIB_151x) + 
\
+  S390_MAX_CPUS * (sizeof(SysIBTl_container) + 
\
+   sizeof(SysIBTl_cpu)))
+
+
 /* MMU defines */
 #define ASCE_ORIGIN   (~0xfffULL) /* segment table origin 
*/
 #define ASCE_SUBSPACE 0x200   /* subspace group control   
*/
@@ -843,4 +889,6 @@ S390CPU *s390_cpu_addr2state(uint16_t cpu_addr);
 
 #include "exec/cpu-all.h"
 
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar);
+
 #endif
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 42b22a1831..c73cebfe6f 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -54,8 +54,6 @@ void s390_topology_new_cpu(int core_id)
 return;
 }
 
-socket_id = core_id / topo->cpus;
-
 /*
  * At the core level, each CPU is represented by a bit in a 64bit
  * unsigned long which represent the presence of a CPU.
@@ -76,8 +74,13 @@ void s390_topology_new_cpu(int core_id)
 bit %= 64;
 bit = 63 - bit;
 
+qemu_mutex_lock(>topo_mutex);
+
+socket_id = core_id / topo->cpus;
 topo->socket[socket_id].active_count++;
 set_bit(bit, >tle[socket_id].mask[origin]);
+
+qemu_mutex_unlock(>topo_mutex);
 }
 
 /**
@@ -101,6 +104,7 @@ static void s390_topology_realize(DeviceState *dev, Error 
**errp)
 topo->tle = g_new0(S390TopoTLE, ms->smp.max_cpus);
 
 topo->ms = ms;
+qemu_mutex_init(>topo_mutex);
 }
 
 /**
diff --git a/target/s390x/cpu_topology.c b/target/s390x/cpu_topology.c
new file mode 100644
index 00..df86a98f23
--- /dev/null
+++ b/target/s390x/cpu_topology.c
@@ -0,0 +1,109 @@
+/*
+ * QEMU S390x CPU Topology
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Pierre Morel 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/s390x/pv.h"
+#include "hw/sysbus.h"
+#include "hw/s390x/cpu-topology.h"
+#include "hw/s390x/sclp.h"
+
+#define S390_TOPOLOGY_MAX_STSI_SIZE (S390_MAX_CPUS *  \
+ (sizeof(SysIB_151x) +\
+

[PATCH v10 6/9] s390x/cpu topology: add topology-disable machine property

2022-10-12 Thread Pierre Morel

S390 CPU topology is only allowed for s390-virtio-ccw-7.3 and
newer S390 machines.
We keep the possibility to disable the topology on these newer
machines with the property topology-disable.

Signed-off-by: Pierre Morel 
---
 include/hw/boards.h|  3 ++
 include/hw/s390x/cpu-topology.h| 18 +-
 include/hw/s390x/s390-virtio-ccw.h |  2 ++
 hw/core/machine.c  |  5 +++
 hw/s390x/s390-virtio-ccw.c | 53 +-
 util/qemu-config.c |  4 +++
 qemu-options.hx|  6 +++-
 7 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 311ed17e18..67147c47bf 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -379,6 +379,9 @@ struct MachineState {
 } \
 type_init(machine_initfn##_register_types)
 
+extern GlobalProperty hw_compat_7_2[];
+extern const size_t hw_compat_7_2_len;
+
 extern GlobalProperty hw_compat_7_1[];
 extern const size_t hw_compat_7_1_len;
 
diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index 35a8a981ec..747c9ab4c6 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -12,6 +12,8 @@
 
 #include "hw/qdev-core.h"
 #include "qom/object.h"
+#include "cpu.h"
+#include "hw/s390x/s390-virtio-ccw.h"
 
 #define S390_TOPOLOGY_POLARITY_H  0x00
 
@@ -43,7 +45,21 @@ void s390_topology_new_cpu(int core_id);
 
 static inline bool s390_has_topology(void)
 {
-return false;
+static S390CcwMachineState *ccw;
+Object *obj;
+
+if (ccw) {
+return !ccw->topology_disable;
+}
+
+/* we have to bail out for the "none" machine */
+obj = object_dynamic_cast(qdev_get_machine(),
+  TYPE_S390_CCW_MACHINE);
+if (!obj) {
+return false;
+}
+ccw = S390_CCW_MACHINE(obj);
+return !ccw->topology_disable;
 }
 
 #endif
diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index 9e7a0d75bc..6c4b4645fc 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -28,6 +28,7 @@ struct S390CcwMachineState {
 bool dea_key_wrap;
 bool pv;
 bool zpcii_disable;
+bool topology_disable;
 uint8_t loadparm[8];
 };
 
@@ -46,6 +47,7 @@ struct S390CcwMachineClass {
 bool cpu_model_allowed;
 bool css_migration_enabled;
 bool hpage_1m_allowed;
+bool topology_allowed;
 };
 
 /* runtime-instrumentation allowed by the machine */
diff --git a/hw/core/machine.c b/hw/core/machine.c
index aa520e74a8..93c497655e 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -40,6 +40,11 @@
 #include "hw/virtio/virtio-pci.h"
 #include "qom/object_interfaces.h"
 
+GlobalProperty hw_compat_7_2[] = {
+{ "s390-topology", "topology-disable", "true" },
+};
+const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2);
+
 GlobalProperty hw_compat_7_1[] = {};
 const size_t hw_compat_7_1_len = G_N_ELEMENTS(hw_compat_7_1);
 
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 362378454a..3a13fad4df 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -616,6 +616,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void 
*data)
 s390mc->cpu_model_allowed = true;
 s390mc->css_migration_enabled = true;
 s390mc->hpage_1m_allowed = true;
+s390mc->topology_allowed = true;
 mc->init = ccw_init;
 mc->reset = s390_machine_reset;
 mc->block_default_type = IF_VIRTIO;
@@ -726,6 +727,27 @@ bool hpage_1m_allowed(void)
 return get_machine_class()->hpage_1m_allowed;
 }
 
+static inline bool machine_get_topology_disable(Object *obj, Error **errp)
+{
+S390CcwMachineState *ms = S390_CCW_MACHINE(obj);
+
+return ms->topology_disable;
+}
+
+static inline void machine_set_topology_disable(Object *obj, bool value,
+Error **errp)
+{
+S390CcwMachineState *ms = S390_CCW_MACHINE(obj);
+
+if (!get_machine_class()->topology_allowed) {
+error_setg(errp, "Property topology-disable not available on machine 
%s",
+   get_machine_class()->parent_class.name);
+return;
+}
+
+ms->topology_disable = value;
+}
+
 static char *machine_get_loadparm(Object *obj, Error **errp)
 {
 S390CcwMachineState *ms = S390_CCW_MACHINE(obj);
@@ -784,6 +806,13 @@ static inline void s390_machine_initfn(Object *obj)
 object_property_set_description(obj, "zpcii-disable",
 "disable zPCI interpretation facilties");
 object_property_set_bool(obj, "zpcii-disable", false, NULL);
+
+object_property_add_bool(obj, "topology-disable",
+ machine_get_topology_disable,
+ machine_set_topology_disable);
+object_property_set_description(obj, "topology-disable",
+"disable CPU topology");
+object_property_set_bool(obj, "topology-disable", false,

Re: [PATCH v2 2/2] error handling: Use RETRY_ON_EINTR() macro where applicable

2022-10-12 Thread Nikita Ivanov

Hi!
Execuse me, my fault. Overlooked TFR occurrences in second patch. I will
correct it.

ср, 12 окт. 2022 г., 18:43 Christian Schoenebeck :

> On Mittwoch, 12. Oktober 2022 17:17:46 CEST Bin Meng wrote:
> > Hi,
> >
> > On Wed, Oct 12, 2022 at 8:32 PM Nikita Ivanov 
> wrote:
> > > There is a defined RETRY_ON_EINTR() macro in qemu/osdep.h which
> > > handles the same while loop.
> > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/415
> > >
> > > Signed-off-by: Nikita Ivanov 
> > > ---
> > >
> > >  block/file-posix.c| 37 -
> > >  chardev/char-pty.c|  4 +---
> > >  hw/9pfs/9p-local.c|  8 ++--
> > >  net/l2tpv3.c  | 17 +
> > >  net/socket.c  | 16 +++-
> > >  net/tap.c | 12 
> > >  qga/commands-posix.c  |  4 +---
> > >  semihosting/syscalls.c|  4 +---
> > >  tests/qtest/libqtest.c| 14 ++
> > >  tests/vhost-user-bridge.c |  4 +---
> > >  util/main-loop.c  |  4 +---
> > >  util/osdep.c  |  4 +---
> > >  util/vfio-helpers.c   | 12 ++--
> > >  13 files changed, 52 insertions(+), 88 deletions(-)
> >
> > This patch has to be squashed into patch 1 for bisectability, as TFR
> > is already removed in patch 1.
>
> They are intentionally separated: 1st patch replaces occurrences of TFR,
> whereas 2nd patch introduces use of macro at locations where not used yet.
>
> Nikita, could you please move those 2 hunks that still had TFR()
> occurrence to
> patch 1?
>
> And please use git's --thread option next time, so that individual patch
> emails are linked to cover letter email (which adds appropriate
> `References:`
> and `In-Reply-To:` email headers).
>
> Best regards,
> Christian Schoenebeck
>
>
>

[PATCH v10 0/9] s390x: CPU Topology

2022-10-12 Thread Pierre Morel

Hi,

The implementation of the CPU Topology in QEMU has been drastically
modified since the last patch series and the number of LOCs has been
greatly reduced.

1) Unnecessary objects have been removed, only a single S390Topology
   object is created to support migration and reset.

2) The introduction of drawers and books is deferred to a later version.

3) A new property, topology-disable, is added for new machines for test
   purpose and migration to/from a host without facility 11 from/to a
   host with the facility 11.

Also a documentation has been added to the series.


To use the QEMU patches, you will need Linux V6-rc1 or newer,
or use the following Linux mainline patches:

f5ecfee94493 2022-07-20 KVM: s390: resetting the Topology-Change-Report
24fe0195bc19 2022-07-20 KVM: s390: guest support for topology function 
0130337ec45b 2022-07-20 KVM: s390: Cleanup ipte lock access and SIIF fac.. 

Currently this code is for KVM only, I have no idea if it is interesting
to provide a TCG patch. If ever it will be done in another series.

To have a better understanding of the S390x CPU Topology and its
implementation in QEMU you can have a look at the documentation in the
last patch of this series.

The admin will want to match the host and the guest topology, taking
into account that the guest does not recognize multithreading.
Consequently, two vCPU assigned to threads of the same real CPU should
preferably be assigned to the same socket of the guest machine.

Regards,
Pierre

Pierre Morel (9):
  s390x/cpu topology: core_id sets s390x CPU topology
  s390x/cpu topology: reporting the CPU topology to the guest
  s390x/cpu_topology: resetting the Topology-Change-Report
  s390x/cpu_topology: CPU topology migration
  target/s390x: interception of PTF instruction
  s390x/cpu topology: add topology-disable machine property
  s390x/cpu topology: add max_threads machine class attribute
  s390x/cpu_topology: activating CPU topology
  docs/s390x: document s390x cpu topology

 docs/system/s390x/cpu_topology.rst |  80 +
 include/hw/boards.h|   3 +
 include/hw/s390x/cpu-topology.h|  65 +++
 include/hw/s390x/s390-virtio-ccw.h |   9 +
 target/s390x/cpu.h |  50 ++
 target/s390x/kvm/kvm_s390x.h   |   1 +
 hw/core/machine.c  |   5 +
 hw/s390x/cpu-topology.c| 279 +
 hw/s390x/s390-virtio-ccw.c |  85 -
 target/s390x/cpu-sysemu.c  |  15 ++
 target/s390x/cpu_topology.c| 109 +++
 target/s390x/kvm/kvm.c |  56 +-
 util/qemu-config.c |   4 +
 hw/s390x/meson.build   |   1 +
 qemu-options.hx|   6 +-
 target/s390x/meson.build   |   1 +
 16 files changed, 766 insertions(+), 3 deletions(-)
 create mode 100644 docs/system/s390x/cpu_topology.rst
 create mode 100644 include/hw/s390x/cpu-topology.h
 create mode 100644 hw/s390x/cpu-topology.c
 create mode 100644 target/s390x/cpu_topology.c

-- 
2.31.1

Changelog:

- since v9

- remove books and drawers

- remove thread denying and replace with a merge
  of cores * threads to specify the CPUs available
  to the guest

- add a class option to avoid topology on older
  machines
  (Cedric)

- Allocate a SYSIB buffer of the maximal length to
  avoid overflow.
  (Nico, Janis)

- suppress redundancy of smp parameters in topology
  and use directly the machine smp structure

- Early check for topology support
  (Cedric)

- since v8

- Linux patches are now mainline

- simplification of the implementation
  (Janis)

- Migration, new machine definition
  (Thomas)

- Documentation

- since v7

- Coherence with the Linux patch series changes for MTCR get
  (Pierre)

- check return values during new CPU creation
  (Thomas)

- Improving codding style and argument usages
  (Thomas)

- since v6

- Changes on smp args in qemu-options
  (Daniel)
  
- changed comments in machine.jason
  (Daniel)
 
- Added reset
  (Janosch)

- since v5

- rebasing on newer QEMU version

- reworked most lines above 80 characters.

- since v4

- Added drawer and books to topology

- Added numa topology

- Added documentation

- since v3

- Added migration
  (Thomas)

- Separated STSI instruction from KVM to prepare TCG
  (Thomas)

- Take care of endianess to prepare TCG
  (Thomas)

- Added comments on STSI CPU container and PFT instruction
  (Thomas)

- Moved enabling the instructions as the last patch
  (Thomas)

[PATCH v10 1/9] s390x/cpu topology: core_id sets s390x CPU topology

2022-10-12 Thread Pierre Morel

In the S390x CPU topology the core_id specifies the CPU address
and the position of the core withing the topology.

Let's build the topology based on the core_id.
s390x/cpu topology: core_id sets s390x CPU topology

In the S390x CPU topology the core_id specifies the CPU address
and the position of the cpu withing the topology.

Let's build the topology based on the core_id.

Signed-off-by: Pierre Morel 
---
 include/hw/s390x/cpu-topology.h |  45 +++
 hw/s390x/cpu-topology.c | 132 
 hw/s390x/s390-virtio-ccw.c  |  21 +
 hw/s390x/meson.build|   1 +
 4 files changed, 199 insertions(+)
 create mode 100644 include/hw/s390x/cpu-topology.h
 create mode 100644 hw/s390x/cpu-topology.c

diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
new file mode 100644
index 00..66c171d0bc
--- /dev/null
+++ b/include/hw/s390x/cpu-topology.h
@@ -0,0 +1,45 @@
+/*
+ * CPU Topology
+ *
+ * Copyright 2022 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef HW_S390X_CPU_TOPOLOGY_H
+#define HW_S390X_CPU_TOPOLOGY_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+typedef struct S390TopoContainer {
+int active_count;
+} S390TopoContainer;
+
+#define S390_TOPOLOGY_CPU_IFL 0x03
+#define S390_TOPOLOGY_MAX_ORIGIN ((63 + S390_MAX_CPUS) / 64)
+typedef struct S390TopoTLE {
+uint64_t mask[S390_TOPOLOGY_MAX_ORIGIN];
+} S390TopoTLE;
+
+struct S390Topology {
+SysBusDevice parent_obj;
+int cpus;
+S390TopoContainer *socket;
+S390TopoTLE *tle;
+MachineState *ms;
+};
+
+#define TYPE_S390_CPU_TOPOLOGY "s390-topology"
+OBJECT_DECLARE_SIMPLE_TYPE(S390Topology, S390_CPU_TOPOLOGY)
+
+S390Topology *s390_get_topology(void);
+void s390_topology_new_cpu(int core_id);
+
+static inline bool s390_has_topology(void)
+{
+return false;
+}
+
+#endif
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
new file mode 100644
index 00..42b22a1831
--- /dev/null
+++ b/hw/s390x/cpu-topology.c
@@ -0,0 +1,132 @@
+/*
+ * CPU Topology
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Pierre Morel 
+
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "hw/sysbus.h"
+#include "hw/qdev-properties.h"
+#include "hw/boards.h"
+#include "qemu/typedefs.h"
+#include "target/s390x/cpu.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+#include "hw/s390x/cpu-topology.h"
+
+S390Topology *s390_get_topology(void)
+{
+static S390Topology *s390Topology;
+
+if (!s390Topology) {
+s390Topology = S390_CPU_TOPOLOGY(
+object_resolve_path(TYPE_S390_CPU_TOPOLOGY, NULL));
+}
+
+return s390Topology;
+}
+
+/*
+ * s390_topology_new_cpu:
+ * @core_id: the core ID is machine wide
+ *
+ * The topology returned by s390_get_topology(), gives us the CPU
+ * topology established by the -smp QEMU aruments.
+ * The core-id gives:
+ *  - the Container TLE (Topology List Entry) containing the CPU TLE.
+ *  - in the CPU TLE the origin, or offset of the first bit in the core mask
+ *  - the bit in the CPU TLE core mask
+ */
+void s390_topology_new_cpu(int core_id)
+{
+S390Topology *topo = s390_get_topology();
+int socket_id;
+int bit, origin;
+
+/* In the case no Topology is used nothing is to be done here */
+if (!topo) {
+return;
+}
+
+socket_id = core_id / topo->cpus;
+
+/*
+ * At the core level, each CPU is represented by a bit in a 64bit
+ * unsigned long which represent the presence of a CPU.
+ * The firmware assume that all CPU in a CPU TLE have the same
+ * type, polarization and are all dedicated or shared.
+ * In that case the origin variable represents the offset of the first
+ * CPU in the CPU container.
+ * More than 64 CPUs per socket are represented in several CPU containers
+ * inside the socket container.
+ * The only reason to have several S390TopologyCores inside a socket is
+ * to have more than 64 CPUs.
+ * In that case the origin variable represents the offset of the first CPU
+ * in the CPU container. More than 64 CPUs per socket are represented in
+ * several CPU containers inside the socket container.
+ */
+bit = core_id;
+origin = bit / 64;
+bit %= 64;
+bit = 63 - bit;
+
+topo->socket[socket_id].active_count++;
+set_bit(bit, >tle[socket_id].mask[origin]);
+}
+
+/**
+ * s390_topology_realize:
+ * @dev: the device state
+ * @errp: the error pointer (not used)
+ *
+ * During realize the machine CPU topology is initialized with the
+ * QEMU -smp parameters.
+ * The maximum count of CPU TLE in the all Topology can not be greater
+ * than the maximum

Re: [PATCH v2 01/15] migration: Take bitmap mutex when completing ram migration

2022-10-12 Thread Dr. David Alan Gilbert

* Peter Xu (pet...@redhat.com) wrote:
> Any call to ram_find_and_save_block() needs to take the bitmap mutex.  We
> used to not take it for most of ram_save_complete() because we thought
> we're the only one left using the bitmap, but it's not true after the
> preempt full patchset applied, since the return path can be taking it too.
> 
> Signed-off-by: Peter Xu 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/ram.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index 1338e47665..cfeb571800 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -3406,6 +3406,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>  /* try transferring iterative blocks of memory */
>  
>  /* flush all remaining blocks regardless of rate limiting */
> +qemu_mutex_lock(>bitmap_mutex);
>  while (true) {
>  int pages;
>  
> @@ -3419,6 +3420,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>  break;
>  }
>  }
> +qemu_mutex_unlock(>bitmap_mutex);
>  
>  flush_compressed_data(rs);
>  ram_control_after_iterate(f, RAM_CONTROL_FINISH);
> -- 
> 2.37.3
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[PULL 6/7] io/command: implement support for win32

2022-10-12 Thread marcandre . lureau

From: Marc-André Lureau 

The initial implementation was changing the pipe state created by GLib
to PIPE_NOWAIT, but it turns out it doesn't work (read/write returns an
error). Since reading may return less than the requested amount, it
seems to be non-blocking already. However, the IO operation may block
until the FD is ready, I can't find good sources of information, to be
safe we can just poll for readiness before.

Alternatively, we could setup the FDs ourself, and use UNIX sockets on
Windows, which can be used in blocking/non-blocking mode. I haven't
tried it, as I am not sure it is necessary.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Daniel P. Berrangé 
Message-Id: <20221006113657.2656108-6-marcandre.lur...@redhat.com>
---
 include/io/channel-command.h |  3 ++
 io/channel-command.c | 80 ++--
 2 files changed, 62 insertions(+), 21 deletions(-)

diff --git a/include/io/channel-command.h b/include/io/channel-command.h
index 8dc58273c0..98934e6d9e 100644
--- a/include/io/channel-command.h
+++ b/include/io/channel-command.h
@@ -42,6 +42,9 @@ struct QIOChannelCommand {
 int writefd;
 int readfd;
 GPid pid;
+#ifdef WIN32
+bool blocking;
+#endif
 };
 
 
diff --git a/io/channel-command.c b/io/channel-command.c
index f84d1f03a0..74516252ba 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -26,7 +26,6 @@
 #include "qemu/sockets.h"
 #include "trace.h"
 
-#ifndef WIN32
 /**
  * qio_channel_command_new_pid:
  * @writefd: the FD connected to the command's stdin
@@ -60,7 +59,13 @@ qio_channel_command_new_pid(int writefd,
 ioc->writefd = writefd;
 ioc->pid = pid;
 
-trace_qio_channel_command_new_pid(ioc, writefd, readfd, pid);
+trace_qio_channel_command_new_pid(ioc, writefd, readfd,
+#ifdef WIN32
+  GetProcessId(pid)
+#else
+  pid
+#endif
+);
 return ioc;
 }
 
@@ -89,18 +94,6 @@ qio_channel_command_new_spawn(const char *const argv[],
 return qio_channel_command_new_pid(stdinfd, stdoutfd, pid);
 }
 
-#else /* WIN32 */
-QIOChannelCommand *
-qio_channel_command_new_spawn(const char *const argv[],
-  int flags,
-  Error **errp)
-{
-error_setg_errno(errp, ENOSYS,
- "Command spawn not supported on this platform");
-return NULL;
-}
-#endif /* WIN32 */
-
 #ifndef WIN32
 static int qio_channel_command_abort(QIOChannelCommand *ioc,
  Error **errp)
@@ -143,6 +136,23 @@ static int qio_channel_command_abort(QIOChannelCommand 
*ioc,
 
 return 0;
 }
+#else
+static int qio_channel_command_abort(QIOChannelCommand *ioc,
+ Error **errp)
+{
+DWORD ret;
+
+TerminateProcess(ioc->pid, 0);
+ret = WaitForSingleObject(ioc->pid, 1000);
+if (ret != WAIT_OBJECT_0) {
+error_setg(errp,
+   "Process %llu refused to die",
+   (unsigned long long)GetProcessId(ioc->pid));
+return -1;
+}
+
+return 0;
+}
 #endif /* ! WIN32 */
 
 
@@ -166,13 +176,27 @@ static void qio_channel_command_finalize(Object *obj)
 }
 ioc->writefd = ioc->readfd = -1;
 if (ioc->pid > 0) {
-#ifndef WIN32
 qio_channel_command_abort(ioc, NULL);
-#endif
 g_spawn_close_pid(ioc->pid);
 }
 }
 
+#ifdef WIN32
+static bool win32_fd_poll(int fd, gushort events)
+{
+GPollFD pfd = { .fd = _get_osfhandle(fd), .events = events };
+int res;
+
+do {
+res = g_poll(, 1, 0);
+} while (res < 0 && errno == EINTR);
+if (res == 0) {
+return false;
+}
+
+return true;
+}
+#endif
 
 static ssize_t qio_channel_command_readv(QIOChannel *ioc,
  const struct iovec *iov,
@@ -184,6 +208,12 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
 QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
 ssize_t ret;
 
+#ifdef WIN32
+if (!cioc->blocking && !win32_fd_poll(cioc->readfd, G_IO_IN)) {
+return QIO_CHANNEL_ERR_BLOCK;
+}
+#endif
+
  retry:
 ret = readv(cioc->readfd, iov, niov);
 if (ret < 0) {
@@ -213,6 +243,12 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc,
 QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
 ssize_t ret;
 
+#ifdef WIN32
+if (!cioc->blocking && !win32_fd_poll(cioc->writefd, G_IO_OUT)) {
+return QIO_CHANNEL_ERR_BLOCK;
+}
+#endif
+
  retry:
 ret = writev(cioc->writefd, iov, niov);
 if (ret <= 0) {
@@ -233,14 +269,14 @@ static int qio_channel_command_set_blocking(QIOChannel 
*ioc,
 bool enabled,
 Error **errp)
 {
+QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
+
 #ifdef WIN32
-/* command spawn is not supported on win32 */
-g_assert_not_reached();
+cioc->blocking = enabled;
 #else
-

[PULL 2/7] osdep: make readv_writev() work with partial read/write

2022-10-12 Thread marcandre . lureau

From: Marc-André Lureau 

With a pipe or other reasons, read/write may return less than the
requested bytes. This happens with the test-io-channel-command test on
Windows. glib spawn code uses a binary pipe of 4096 bytes, and the first
read returns that much (although more are requested), for some unclear
reason...

Signed-off-by: Marc-André Lureau 
Reviewed-by: Daniel P. Berrangé 
Message-Id: <20221006113657.2656108-2-marcandre.lur...@redhat.com>
---
 util/osdep.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/util/osdep.c b/util/osdep.c
index 60fcbbaebe..746d5f7d71 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -538,18 +538,22 @@ int socket_init(void)
 
 
 #ifndef CONFIG_IOVEC
-/* helper function for iov_send_recv() */
 static ssize_t
 readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
 {
 unsigned i = 0;
 ssize_t ret = 0;
+ssize_t off = 0;
 while (i < iov_cnt) {
 ssize_t r = do_write
-? write(fd, iov[i].iov_base, iov[i].iov_len)
-: read(fd, iov[i].iov_base, iov[i].iov_len);
+? write(fd, iov[i].iov_base + off, iov[i].iov_len - off)
+: read(fd, iov[i].iov_base + off, iov[i].iov_len - off);
 if (r > 0) {
 ret += r;
+off += r;
+if (off < iov[i].iov_len) {
+continue;
+}
 } else if (!r) {
 break;
 } else if (errno == EINTR) {
@@ -562,6 +566,7 @@ readv_writev(int fd, const struct iovec *iov, int iov_cnt, 
bool do_write)
 }
 break;
 }
+off = 0;
 i++;
 }
 return ret;
-- 
2.37.3

Re: [PATCH v2 1/2] Refactoring: refactor TFR() macro to RETRY_ON_EINTR()

2022-10-12 Thread Christian Schoenebeck

On Mittwoch, 12. Oktober 2022 14:28:23 CEST Nikita Ivanov wrote:
> Rename macro name to more transparent one and refactor
> it to expression.
> 
> Signed-off-by: Nikita Ivanov 
> ---
>  chardev/char-fd.c| 2 +-
>  chardev/char-pipe.c  | 8 +---
>  include/qemu/osdep.h | 8 +++-
>  net/tap-bsd.c| 6 +++---
>  net/tap-linux.c  | 2 +-
>  net/tap-solaris.c| 8 
>  os-posix.c   | 2 +-
>  7 files changed, 22 insertions(+), 14 deletions(-)
> 
> diff --git a/chardev/char-fd.c b/chardev/char-fd.c
> index cf78454841..d2c4923359 100644
> --- a/chardev/char-fd.c
> +++ b/chardev/char-fd.c
> @@ -198,7 +198,7 @@ int qmp_chardev_open_file_source(char *src, int flags,
> Error **errp)
>  {
>  int fd = -1;
> 
> -TFR(fd = qemu_open_old(src, flags, 0666));
> +fd = RETRY_ON_EINTR(qemu_open_old(src, flags, 0666));
>  if (fd == -1) {
>  error_setg_file_open(errp, errno, src);
>  }
> diff --git a/chardev/char-pipe.c b/chardev/char-pipe.c
> index 66d3b85091..5ad30bcc59 100644
> --- a/chardev/char-pipe.c
> +++ b/chardev/char-pipe.c
> @@ -131,8 +131,8 @@ static void qemu_chr_open_pipe(Chardev *chr,
> 
>  filename_in = g_strdup_printf("%s.in", filename);
>  filename_out = g_strdup_printf("%s.out", filename);
> -TFR(fd_in = qemu_open_old(filename_in, O_RDWR | O_BINARY));
> -TFR(fd_out = qemu_open_old(filename_out, O_RDWR | O_BINARY));
> +fd_in = RETRY_ON_EINTR(qemu_open_old(filename_in, O_RDWR | O_BINARY));
> +fd_out = RETRY_ON_EINTR(qemu_open_old(filename_out, O_RDWR |
> O_BINARY));
>  g_free(filename_in);
>  g_free(filename_out);
>  if (fd_in < 0 || fd_out < 0) {
> @@ -142,7 +142,9 @@ static void qemu_chr_open_pipe(Chardev *chr,
>  if (fd_out >= 0) {
>  close(fd_out);
>  }
> -TFR(fd_in = fd_out = qemu_open_old(filename, O_RDWR | O_BINARY));
> +fd_in = fd_out = RETRY_ON_EINTR(
> +qemu_open_old(filename, O_RDWR | O_BINARY)
> +);
>  if (fd_in < 0) {
>  error_setg_file_open(errp, errno, filename);
>  return;
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index b1c161c035..a470905475 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -243,7 +243,13 @@ void QEMU_ERROR("code path is reachable")
>  #define ESHUTDOWN 4099
>  #endif
> 
> -#define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
> +#define RETRY_ON_EINTR(expr) \
> +(__extension__  \
> +({ typeof(expr) __result;   \
> +   do { \
> +__result = (typeof(expr)) (expr); \

Not a big deal, but as Peter already pointed out in previous version: you 
could drop the type cast in this particular form here.

glibc's TEMP_FAILURE_RETRY() version needs the cast as it uses `long int` as 
hard coded type for the result variable, whereas this version here uses a 
generic approach by declaring the result variable already exactly with the 
type the passed expression evaluates to, so the cast is redundant in this 
version here.

> +   } while (__result == -1L && errno == EINTR); \
> +   __result; }))
> 
>  /* time_t may be either 32 or 64 bits depending on the host OS, and
>   * can be either signed or unsigned, so we can't just hardcode a
> diff --git a/net/tap-bsd.c b/net/tap-bsd.c
> index 005ce05c6e..4c98fdd337 100644
> --- a/net/tap-bsd.c
> +++ b/net/tap-bsd.c
> @@ -56,7 +56,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
> } else {
>  snprintf(dname, sizeof dname, "/dev/tap%d", i);
>  }
> -TFR(fd = open(dname, O_RDWR));
> +fd = RETRY_ON_EINTR(open(dname, O_RDWR));
>  if (fd >= 0) {
>  break;
>  }
> @@ -111,7 +111,7 @@ static int tap_open_clone(char *ifname, int
> ifname_size, Error **errp)
>  int fd, s, ret;
>  struct ifreq ifr;
> 
> -TFR(fd = open(PATH_NET_TAP, O_RDWR));
> +fd = RETRY_ON_EINTR(open(PATH_NET_TAP, O_RDWR));
>  if (fd < 0) {
>  error_setg_errno(errp, errno, "could not open %s", PATH_NET_TAP);
>  return -1;
> @@ -159,7 +159,7 @@ int tap_open(char *ifname, int ifname_size, int
> *vnet_hdr,
>  if (ifname[0] != '\0') {
>  char dname[100];
>  snprintf(dname, sizeof dname, "/dev/%s", ifname);
> -TFR(fd = open(dname, O_RDWR));
> +fd = RETRY_ON_EINTR(open(dname, O_RDWR));
>  if (fd < 0 && errno != ENOENT) {
>  error_setg_errno(errp, errno, "could not open %s", dname);
>  return -1;
> diff --git a/net/tap-linux.c b/net/tap-linux.c
> index 304ff45071..f54f308d35 100644
> --- a/net/tap-linux.c
> +++ b/net/tap-linux.c
> @@ -45,7 +45,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
> int len = sizeof(struct virtio_net_hdr);
>  unsigned int features;
> 
> -TFR(fd =

[PULL 3/7] util: make do_send_recv work with partial send/recv

2022-10-12 Thread marcandre . lureau

From: Marc-André Lureau 

According to msdn documentation and Linux man pages, send() should try
to send as much as possible in blocking mode, while recv() may return
earlier with a smaller available amount, we should try to continue
send/recv from there.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Daniel P. Berrangé 
Message-Id: <20221006113657.2656108-3-marcandre.lur...@redhat.com>
---
 util/iov.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/util/iov.c b/util/iov.c
index 22d6996cce..b4be580022 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -111,12 +111,17 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned 
iov_cnt, bool do_send)
 /*XXX Note: windows has WSASend() and WSARecv() */
 unsigned i = 0;
 ssize_t ret = 0;
+ssize_t off = 0;
 while (i < iov_cnt) {
 ssize_t r = do_send
-? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
-: recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
+? send(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, 0)
+: recv(sockfd, iov[i].iov_base + off, iov[i].iov_len - off, 0);
 if (r > 0) {
 ret += r;
+off += r;
+if (off < iov[i].iov_len) {
+continue;
+}
 } else if (!r) {
 break;
 } else if (errno == EINTR) {
@@ -129,6 +134,7 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned 
iov_cnt, bool do_send)
 }
 break;
 }
+off = 0;
 i++;
 }
 return ret;
-- 
2.37.3

Re: [PATCH v7 4/5] hw/mem/cxl-type3: Add CXL CDAT Data Object Exchange

2022-10-12 Thread Gregory Price

This code contains heap corruption on free, and I think should be
refactored to pre-allocate all the entries we're interested in putting
into the table.  This would flatten the code and simplify the error
handling steps.

Also, should we consider making a union with all the possible entries to
make entry allocation easier?  It may eat a few extra bytes of memory,
but it would simplify the allocation/cleanup code here further.

Given that every allocation has to be checked, i'm also not convinced
the use of g_autofree is worth the potential footguns associated with
it.

> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 568c9d62f5..3fa5d70662 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -12,9 +12,218 @@
> +static int ct3_build_cdat_table(CDATSubHeader ***cdat_table,
> +void *priv)
> +{
(snip)
> +/* For now, no memory side cache, plausiblish numbers */
> +dslbis_nonvolatile = g_malloc(sizeof(*dslbis_nonvolatile) * 
> dslbis_nonvolatile_num);
> +if (!dslbis_nonvolatile)
> +return -ENOMEM;

this allocation creates a table of entries, which is later freed
incorrectly

> +
> +*cdat_table = g_malloc0(len * sizeof(*cdat_table));

this allocation needs to be checked

> +/* Header always at start of structure */
> +if (dsmas_nonvolatile) {
> +(*cdat_table)[i++] = g_steal_pointer(_nonvolatile);
> +}
> +if (dslbis_nonvolatile) {
> +CDATDslbis *dslbis = g_steal_pointer(_nonvolatile);

using a local reference used to avoid a g_autofree footgun suggests
we should not use g_autofree here, and possibly reconsider the overall
strategy for allocation and cleanup

> +int j;
> +
> +for (j = 0; j < dslbis_nonvolatile_num; j++) {
> +(*cdat_table)[i++] = (CDATSubHeader *)[j];
> +}

this fills the CDAT table with sub-references to the table allocated
above, which leads to heap corruption with the current code, or
complicated cleanup if we decide to keep it

> +
> +return len;
> +}
> +
> +static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void 
> *priv)
> +{
> +int i;
> +

And here we free every entry of the table, which can/will cause heap
corruption when the sub-table entries are freed

> +for (i = 0; i < num; i++) {
> +g_free(cdat_table[i]);
> +}
> +g_free(cdat_table);
> +}

[PULL 0/7] Win32 patches

2022-10-12 Thread marcandre . lureau

From: Marc-André Lureau 

The following changes since commit ab44ea1059242ff2dbbde44e94468f6c6e5f87be:

  Merge tag 'pull-testing-gdbstub-plugins-gitdm-111022-1' of 
https://github.com/stsquad/qemu into staging (2022-10-11 15:31:27 -0400)

are available in the Git repository at:

  https://gitlab.com/marcandre.lureau/qemu.git tags/win32-pull-request

for you to fetch changes up to 76f5148c21b4543e62a6ad605ac4b44133421401:

  tests/unit: make test-io-channel-command work on win32 (2022-10-12 19:22:01 
+0400)


win32-related misc patches



Marc-André Lureau (7):
  win32: set threads name
  osdep: make readv_writev() work with partial read/write
  util: make do_send_recv work with partial send/recv
  tests/channel-helper: set blocking in main thread
  io/command: use glib GSpawn, instead of open-coding fork/exec
  io/command: implement support for win32
  tests/unit: make test-io-channel-command work on win32

 include/io/channel-command.h |   5 +-
 io/channel-command.c | 185 +++
 tests/unit/io-channel-helpers.c  |   9 +-
 tests/unit/test-io-channel-command.c |  37 +++---
 util/iov.c   |  10 +-
 util/osdep.c |  11 +-
 util/qemu-thread-win32.c |  54 +++-
 7 files changed, 171 insertions(+), 140 deletions(-)

-- 
2.37.3

[PULL 4/7] tests/channel-helper: set blocking in main thread

2022-10-12 Thread marcandre . lureau

From: Marc-André Lureau 

The /io/channel/command/echo tests run the reader side and the writer
side with the same underlying command channel. Setting the blocking mode
of the fd/handles while the other end is already reading/writing may
create issues (deadlock in win32 when earlier attempt of this series
were using SetNamedPipeHandleState). Let's just do it before spawning
the threads to avoid further concurrency issues.

Signed-off-by: Marc-André Lureau 
Reviewed-by: Daniel P. Berrangé 
Message-Id: <20221006113657.2656108-4-marcandre.lur...@redhat.com>
---
 tests/unit/io-channel-helpers.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/unit/io-channel-helpers.c b/tests/unit/io-channel-helpers.c
index ff156ed3c4..c0799c21c2 100644
--- a/tests/unit/io-channel-helpers.c
+++ b/tests/unit/io-channel-helpers.c
@@ -25,7 +25,6 @@
 struct QIOChannelTest {
 QIOChannel *src;
 QIOChannel *dst;
-bool blocking;
 size_t len;
 size_t niov;
 char *input;
@@ -42,8 +41,6 @@ static gpointer test_io_thread_writer(gpointer opaque)
 {
 QIOChannelTest *data = opaque;
 
-qio_channel_set_blocking(data->src, data->blocking, NULL);
-
 qio_channel_writev_all(data->src,
data->inputv,
data->niov,
@@ -58,8 +55,6 @@ static gpointer test_io_thread_reader(gpointer opaque)
 {
 QIOChannelTest *data = opaque;
 
-qio_channel_set_blocking(data->dst, data->blocking, NULL);
-
 qio_channel_readv_all(data->dst,
   data->outputv,
   data->niov,
@@ -113,7 +108,9 @@ void qio_channel_test_run_threads(QIOChannelTest *test,
 
 test->src = src;
 test->dst = dst;
-test->blocking = blocking;
+
+qio_channel_set_blocking(test->dst, blocking, NULL);
+qio_channel_set_blocking(test->src, blocking, NULL);
 
 reader = g_thread_new("reader",
   test_io_thread_reader,
-- 
2.37.3

[PULL 7/7] tests/unit: make test-io-channel-command work on win32

2022-10-12 Thread marcandre . lureau

From: Marc-André Lureau 

This has been tested under msys2 & windows 11. I haven't tried to make
it work with other environments yet, but that should be enough to
validate the channel-command implementation anyway.

Here are the changes:
- drop tests/ from fifo/pipe path, to avoid directory issues
- use g_find_program() to lookup the socat executable (otherwise we
would need to change ChanneCommand to use G_SPAWN_SEARCH_PATH, and deal
with missing socat differently)
- skip the "echo" test when socat is missing as well

Signed-off-by: Marc-André Lureau 
Reviewed-by: Daniel P. Berrangé 
Message-Id: <20221006113657.2656108-7-marcandre.lur...@redhat.com>
---
 tests/unit/test-io-channel-command.c | 37 ++--
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/tests/unit/test-io-channel-command.c 
b/tests/unit/test-io-channel-command.c
index aa09c559cd..7eee939c07 100644
--- a/tests/unit/test-io-channel-command.c
+++ b/tests/unit/test-io-channel-command.c
@@ -24,29 +24,30 @@
 #include "qapi/error.h"
 #include "qemu/module.h"
 
-#ifndef WIN32
+#define TEST_FIFO "test-io-channel-command.fifo"
+
+#define SOCAT_SRC "PIPE:" TEST_FIFO ",wronly"
+#define SOCAT_DST "PIPE:" TEST_FIFO ",rdonly"
+
+static char *socat = NULL;
+
 static void test_io_channel_command_fifo(bool async)
 {
-#define TEST_FIFO "tests/test-io-channel-command.fifo"
 QIOChannel *src, *dst;
 QIOChannelTest *test;
-const char *srcfifo = "PIPE:" TEST_FIFO ",wronly";
-const char *dstfifo = "PIPE:" TEST_FIFO ",rdonly";
 const char *srcargv[] = {
-"/bin/socat", "-", srcfifo, NULL,
+socat, "-", SOCAT_SRC, NULL,
 };
 const char *dstargv[] = {
-"/bin/socat", dstfifo, "-", NULL,
+socat, SOCAT_DST, "-", NULL,
 };
 
-unlink(TEST_FIFO);
-if (access("/bin/socat", X_OK) < 0) {
-g_test_skip("socat is missing");
+if (!socat) {
+g_test_skip("socat is not found in PATH");
 return;
 }
-if (mkfifo(TEST_FIFO, 0600) < 0) {
-abort();
-}
+
+unlink(TEST_FIFO);
 src = QIO_CHANNEL(qio_channel_command_new_spawn(srcargv,
 O_WRONLY,
 _abort));
@@ -81,11 +82,12 @@ static void test_io_channel_command_echo(bool async)
 QIOChannel *ioc;
 QIOChannelTest *test;
 const char *socatargv[] = {
-"/bin/socat", "-", "-", NULL,
+socat, "-", "-", NULL,
 };
 
-if (access("/bin/socat", X_OK) < 0) {
-return; /* Pretend success if socat is not present */
+if (!socat) {
+g_test_skip("socat is not found in PATH");
+return;
 }
 
 ioc = QIO_CHANNEL(qio_channel_command_new_spawn(socatargv,
@@ -108,7 +110,6 @@ static void test_io_channel_command_echo_sync(void)
 {
 test_io_channel_command_echo(false);
 }
-#endif
 
 int main(int argc, char **argv)
 {
@@ -116,7 +117,8 @@ int main(int argc, char **argv)
 
 g_test_init(, , NULL);
 
-#ifndef WIN32
+socat = g_find_program_in_path("socat");
+
 g_test_add_func("/io/channel/command/fifo/sync",
 test_io_channel_command_fifo_sync);
 g_test_add_func("/io/channel/command/fifo/async",
@@ -125,7 +127,6 @@ int main(int argc, char **argv)
 test_io_channel_command_echo_sync);
 g_test_add_func("/io/channel/command/echo/async",
 test_io_channel_command_echo_async);
-#endif
 
 return g_test_run();
 }
-- 
2.37.3

[PULL 5/7] io/command: use glib GSpawn, instead of open-coding fork/exec

2022-10-12 Thread marcandre . lureau

From: Marc-André Lureau 

Simplify qio_channel_command_new_spawn() with GSpawn API. This will
allow to build for WIN32 in the following patches.

As pointed out by Daniel Berrangé: there is a change in semantics here
too. The current code only touches stdin/stdout/stderr. Any other FDs
which do NOT have O_CLOEXEC set will be inherited. With the new code,
all FDs except stdin/out/err will be explicitly closed, because we don't
set the flag G_SPAWN_LEAVE_DESCRIPTORS_OPEN. The only place we use
QIOChannelCommand today is the migration exec: protocol, and that is
only declared to use stdin/stdout.

Reviewed-by: Daniel P. Berrangé 
Signed-off-by: Marc-André Lureau 
Message-Id: <20221006113657.2656108-5-marcandre.lur...@redhat.com>
---
 include/io/channel-command.h |   2 +-
 io/channel-command.c | 105 ++-
 2 files changed, 19 insertions(+), 88 deletions(-)

diff --git a/include/io/channel-command.h b/include/io/channel-command.h
index 305ac1d280..8dc58273c0 100644
--- a/include/io/channel-command.h
+++ b/include/io/channel-command.h
@@ -41,7 +41,7 @@ struct QIOChannelCommand {
 QIOChannel parent;
 int writefd;
 int readfd;
-pid_t pid;
+GPid pid;
 };
 
 
diff --git a/io/channel-command.c b/io/channel-command.c
index 9f2f4a1793..f84d1f03a0 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -31,7 +31,7 @@
  * qio_channel_command_new_pid:
  * @writefd: the FD connected to the command's stdin
  * @readfd: the FD connected to the command's stdout
- * @pid: the PID of the running child command
+ * @pid: the PID/HANDLE of the running child command
  * @errp: pointer to a NULL-initialized error object
  *
  * Create a channel for performing I/O with the
@@ -50,7 +50,7 @@
 static QIOChannelCommand *
 qio_channel_command_new_pid(int writefd,
 int readfd,
-pid_t pid)
+GPid pid)
 {
 QIOChannelCommand *ioc;
 
@@ -69,94 +69,24 @@ qio_channel_command_new_spawn(const char *const argv[],
   int flags,
   Error **errp)
 {
-pid_t pid = -1;
-int stdinfd[2] = { -1, -1 };
-int stdoutfd[2] = { -1, -1 };
-int devnull = -1;
-bool stdinnull = false, stdoutnull = false;
-QIOChannelCommand *ioc;
+g_autoptr(GError) err = NULL;
+GPid pid = 0;
+GSpawnFlags gflags = G_SPAWN_CLOEXEC_PIPES | G_SPAWN_DO_NOT_REAP_CHILD;
+int stdinfd = -1, stdoutfd = -1;
 
 flags = flags & O_ACCMODE;
-
-if (flags == O_RDONLY) {
-stdinnull = true;
-}
-if (flags == O_WRONLY) {
-stdoutnull = true;
-}
-
-if (stdinnull || stdoutnull) {
-devnull = open("/dev/null", O_RDWR);
-if (devnull < 0) {
-error_setg_errno(errp, errno,
- "Unable to open /dev/null");
-goto error;
-}
-}
-
-if ((!stdinnull && !g_unix_open_pipe(stdinfd, FD_CLOEXEC, NULL)) ||
-(!stdoutnull && !g_unix_open_pipe(stdoutfd, FD_CLOEXEC, NULL))) {
-error_setg_errno(errp, errno,
- "Unable to open pipe");
-goto error;
-}
-
-pid = qemu_fork(errp);
-if (pid < 0) {
-goto error;
-}
-
-if (pid == 0) { /* child */
-dup2(stdinnull ? devnull : stdinfd[0], STDIN_FILENO);
-dup2(stdoutnull ? devnull : stdoutfd[1], STDOUT_FILENO);
-/* Leave stderr connected to qemu's stderr */
-
-if (!stdinnull) {
-close(stdinfd[0]);
-close(stdinfd[1]);
-}
-if (!stdoutnull) {
-close(stdoutfd[0]);
-close(stdoutfd[1]);
-}
-if (devnull != -1) {
-close(devnull);
-}
-
-execv(argv[0], (char * const *)argv);
-_exit(1);
+gflags |= flags == O_WRONLY ? G_SPAWN_STDOUT_TO_DEV_NULL : 0;
+
+if (!g_spawn_async_with_pipes(NULL, (char **)argv, NULL, gflags, NULL, 
NULL,
+  ,
+  flags == O_RDONLY ? NULL : ,
+  flags == O_WRONLY ? NULL : ,
+  NULL, )) {
+error_setg(errp, "%s", err->message);
+return NULL;
 }
 
-if (!stdinnull) {
-close(stdinfd[0]);
-}
-if (!stdoutnull) {
-close(stdoutfd[1]);
-}
-
-ioc = qio_channel_command_new_pid(stdinnull ? devnull : stdinfd[1],
-  stdoutnull ? devnull : stdoutfd[0],
-  pid);
-trace_qio_channel_command_new_spawn(ioc, argv[0], flags);
-return ioc;
-
- error:
-if (devnull != -1) {
-close(devnull);
-}
-if (stdinfd[0] != -1) {
-close(stdinfd[0]);
-}
-if (stdinfd[1] != -1) {
-close(stdinfd[1]);
-}
-if (stdoutfd[0] != -1) {
-close(stdoutfd[0]);
-}
-if (stdoutfd[1] != -1) {
-close(stdoutfd[1]);
-

[PULL 1/7] win32: set threads name

2022-10-12 Thread marcandre . lureau

From: Marc-André Lureau 

As described in:
https://learn.microsoft.com/en-us/visualstudio/debugger/how-to-set-a-thread-name-in-native-code?view=vs-2022

SetThreadDescription() is available since Windows 10, version 1607 and
in some versions only by "Run Time Dynamic Linking". Its declaration is
not yet in mingw, so we lookup the function the same way glib does.

Tested with Visual Studio Community 2022 debugger.

Signed-off-by: Marc-André Lureau 
Acked-by: Richard Henderson 
---
 util/qemu-thread-win32.c | 54 ++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index a2d5a6e825..b9a467d7db 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -19,12 +19,39 @@
 
 static bool name_threads;
 
+typedef HRESULT (WINAPI *pSetThreadDescription) (HANDLE hThread,
+ PCWSTR lpThreadDescription);
+static pSetThreadDescription SetThreadDescriptionFunc;
+static HMODULE kernel32_module;
+
+static bool load_set_thread_description(void)
+{
+static gsize _init_once = 0;
+
+if (g_once_init_enter(&_init_once)) {
+kernel32_module = LoadLibrary("kernel32.dll");
+if (kernel32_module) {
+SetThreadDescriptionFunc =
+(pSetThreadDescription)GetProcAddress(kernel32_module,
+  "SetThreadDescription");
+if (!SetThreadDescriptionFunc) {
+FreeLibrary(kernel32_module);
+}
+}
+g_once_init_leave(&_init_once, 1);
+}
+
+return !!SetThreadDescriptionFunc;
+}
+
 void qemu_thread_naming(bool enable)
 {
-/* But note we don't actually name them on Windows yet */
 name_threads = enable;
 
-fprintf(stderr, "qemu: thread naming not supported on this host\n");
+if (enable && !load_set_thread_description()) {
+fprintf(stderr, "qemu: thread naming not supported on this host\n");
+name_threads = false;
+}
 }
 
 static void error_exit(int err, const char *msg)
@@ -400,6 +427,25 @@ void *qemu_thread_join(QemuThread *thread)
 return ret;
 }
 
+static bool set_thread_description(HANDLE h, const char *name)
+{
+HRESULT hr;
+g_autofree wchar_t *namew = NULL;
+
+if (!load_set_thread_description()) {
+return false;
+}
+
+namew = g_utf8_to_utf16(name, -1, NULL, NULL, NULL);
+if (!namew) {
+return false;
+}
+
+hr = SetThreadDescriptionFunc(h, namew);
+
+return SUCCEEDED(hr);
+}
+
 void qemu_thread_create(QemuThread *thread, const char *name,
void *(*start_routine)(void *),
void *arg, int mode)
@@ -423,7 +469,11 @@ void qemu_thread_create(QemuThread *thread, const char 
*name,
 if (!hThread) {
 error_exit(GetLastError(), __func__);
 }
+if (name_threads && name && !set_thread_description(hThread, name)) {
+fprintf(stderr, "qemu: failed to set thread description: %s\n", name);
+}
 CloseHandle(hThread);
+
 thread->data = data;
 }
 
-- 
2.37.3

Re: [RFC PATCH 0/4] docs/devel suggestions for discussion

2022-10-12 Thread Paolo Bonzini


On 10/12/22 14:11, Alex Bennée wrote:

Hi,

This is an attempt to improve our processes documentation by:

  - adding an explicit section on maintainers
  - reducing the up-front verbiage in patch submission
  - emphasising the importance to respectful reviews

I'm sure the language could be improved further so I humbly submit
this RFC for discussion.

Alex Bennée (4):
   docs/devel: add a maintainers section to development process
   docs/devel: make language a little less code centric
   docs/devel: simplify the minimal checklist
   docs/devel: try and improve the language around patch review

  docs/devel/code-of-conduct.rst   |   2 +
  docs/devel/index-process.rst |   1 +
  docs/devel/maintainers.rst   |  84 +++
  docs/devel/submitting-a-patch.rst| 101 +++
  docs/devel/submitting-a-pull-request.rst |  12 +--
  roms/qboot   |   2 +-
  6 files changed, 157 insertions(+), 45 deletions(-)
  create mode 100644 docs/devel/maintainers.rst


Thanks, these are useful improvements.  On top we could probably merge 
some content from Linux and make the documentation standalone.  But still:


Reviewed-by: Paolo Bonzini 

after addressing comments from Stefan and myself.

Paolo

Re: [PATCH 0/3] iothread and irqfd support

2022-10-12 Thread Stefan Hajnoczi

virtio-blk's dataplane BH completion batching mechanism is not enabled
by default and the performance results are mixed. If you develop a
different mechanism from scratch I think there's a good chance it
would work better :).

This looks like a queuing theory problem to me. It should be possible
to model IOPS as a function of some parameters and then hopefully find
simple rules to optimize IOPS by adjusting some of the parameters at
runtime. I haven't looked into this much myself though, so I don't
have any concrete suggestion. The basic idea is that as long as events
occur at a minimum rate then they can be batched to maximize
throughput without sacrificing too much latency. If the rate drops
then the device cannot hold back events.

Another place to look for inspiration is network cards. In Linux it's
common to use the NAPI framework to disable further receive interrupts
and then poll until the receive queue becomes empty. Transmit
completions can also be mitigated, but I'm not sure what the most
common approach is there.

Stefan

Re: [RFC PATCH 2/4] docs/devel: make language a little less code centric

2022-10-12 Thread Paolo Bonzini


On 10/12/22 14:11, Alex Bennée wrote:

+QEMU welcomes contributions to fix bugs, add functionality or improve
+the documentation. However, we get a lot of patches, and so we have
+some guidelines about submitting patches. If you follow these, you'll


While we're at it, "about submitting them".

Paolo


+help make our task of code review easier and your patch is likely to
+be committed faster.

Re: [PATCH v2 2/2] error handling: Use RETRY_ON_EINTR() macro where applicable

2022-10-12 Thread Christian Schoenebeck

On Mittwoch, 12. Oktober 2022 17:17:46 CEST Bin Meng wrote:
> Hi,
> 
> On Wed, Oct 12, 2022 at 8:32 PM Nikita Ivanov  wrote:
> > There is a defined RETRY_ON_EINTR() macro in qemu/osdep.h which
> > handles the same while loop.
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/415
> > 
> > Signed-off-by: Nikita Ivanov 
> > ---
> > 
> >  block/file-posix.c| 37 -
> >  chardev/char-pty.c|  4 +---
> >  hw/9pfs/9p-local.c|  8 ++--
> >  net/l2tpv3.c  | 17 +
> >  net/socket.c  | 16 +++-
> >  net/tap.c | 12 
> >  qga/commands-posix.c  |  4 +---
> >  semihosting/syscalls.c|  4 +---
> >  tests/qtest/libqtest.c| 14 ++
> >  tests/vhost-user-bridge.c |  4 +---
> >  util/main-loop.c  |  4 +---
> >  util/osdep.c  |  4 +---
> >  util/vfio-helpers.c   | 12 ++--
> >  13 files changed, 52 insertions(+), 88 deletions(-)
> 
> This patch has to be squashed into patch 1 for bisectability, as TFR
> is already removed in patch 1.

They are intentionally separated: 1st patch replaces occurrences of TFR,
whereas 2nd patch introduces use of macro at locations where not used yet.

Nikita, could you please move those 2 hunks that still had TFR() occurrence to
patch 1?

And please use git's --thread option next time, so that individual patch 
emails are linked to cover letter email (which adds appropriate `References:`
and `In-Reply-To:` email headers).

Best regards,
Christian Schoenebeck

[PATCH 1/4] qom: Improve error messages when property has no getter or setter

2022-10-12 Thread Markus Armbruster

When you try to set a property that has no setter, the error message
blames "insufficient permission":

$ qemu-system-x86_64 -S -display none -nodefaults -monitor stdio
QEMU 7.1.50 monitor - type 'help' for more information
(qemu) qom-set /machine type q35
Error: Insufficient permission to perform this operation

This implies it could work with "sufficient permission".  It can't.
Change the error message to:

Error: Property 'pc-i440fx-7.2-machine.type' is not writable

Do the same for getting a property that has no getter.

Signed-off-by: Markus Armbruster 
---
 qom/object.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/qom/object.c b/qom/object.c
index d34608558e..e5cef30f6d 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -1383,7 +1383,8 @@ bool object_property_get(Object *obj, const char *name, 
Visitor *v,
 }
 
 if (!prop->get) {
-error_setg(errp, QERR_PERMISSION_DENIED);
+error_setg(errp, "Property '%s.%s' is not readable",
+   object_get_typename(obj), name);
 return false;
 }
 prop->get(obj, v, name, prop->opaque, );
@@ -1402,7 +1403,8 @@ bool object_property_set(Object *obj, const char *name, 
Visitor *v,
 }
 
 if (!prop->set) {
-error_setg(errp, QERR_PERMISSION_DENIED);
+error_setg(errp, "Property '%s.%s' is not writable",
+   object_get_typename(obj), name);
 return false;
 }
 prop->set(obj, v, name, prop->opaque, errp);
-- 
2.37.2

[PATCH 2/4] backends: Improve error messages when property can no longer be set

2022-10-12 Thread Markus Armbruster

When you try to set virtio-rng property "filename" after the backend
has been completed with user_creatable_complete(), the error message
blames "insufficient permission":

$ qemu-system-x86_64 -S -display none -nodefaults -monitor stdio -object 
rng-random,id=rng0 -device virtio-rng,id=vrng0,rng=rng0
QEMU 7.1.50 monitor - type 'help' for more information
(qemu) qom-set /objects/rng0 filename /dev/random
Error: Insufficient permission to perform this operation

This implies it could work with "sufficient permission".  It can't.
Change the error message to:

Error: Property 'filename' can no longer be set

Same for cryptodev-vhost-user property "chardev", rng-egd property
"chardev", and vhost-user-backend property "chardev".

Signed-off-by: Markus Armbruster 

# This is the commit message #2:

# fixup! backends: Improve error messages when property can no longer be set
---
 backends/cryptodev-vhost-user.c | 2 +-
 backends/rng-egd.c  | 2 +-
 backends/rng-random.c   | 2 +-
 backends/vhost-user.c   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/backends/cryptodev-vhost-user.c b/backends/cryptodev-vhost-user.c
index 5443a59153..f9c5867e38 100644
--- a/backends/cryptodev-vhost-user.c
+++ b/backends/cryptodev-vhost-user.c
@@ -339,7 +339,7 @@ static void cryptodev_vhost_user_set_chardev(Object *obj,
   CRYPTODEV_BACKEND_VHOST_USER(obj);
 
 if (s->opened) {
-error_setg(errp, QERR_PERMISSION_DENIED);
+error_setg(errp, "Property 'chardev' can no longer be set");
 } else {
 g_free(s->chr_name);
 s->chr_name = g_strdup(value);
diff --git a/backends/rng-egd.c b/backends/rng-egd.c
index 4de142b9dc..684c3cf3d6 100644
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -116,7 +116,7 @@ static void rng_egd_set_chardev(Object *obj, const char 
*value, Error **errp)
 RngEgd *s = RNG_EGD(b);
 
 if (b->opened) {
-error_setg(errp, QERR_PERMISSION_DENIED);
+error_setg(errp, "Property 'chardev' can no longer be set");
 } else {
 g_free(s->chr_name);
 s->chr_name = g_strdup(value);
diff --git a/backends/rng-random.c b/backends/rng-random.c
index 7add272edd..80eb5be138 100644
--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -96,7 +96,7 @@ static void rng_random_set_filename(Object *obj, const char 
*filename,
 RngRandom *s = RNG_RANDOM(obj);
 
 if (b->opened) {
-error_setg(errp, QERR_PERMISSION_DENIED);
+error_setg(errp, "Property 'filename' can no longer be set");
 return;
 }
 
diff --git a/backends/vhost-user.c b/backends/vhost-user.c
index 10b39992d2..5dedb2d987 100644
--- a/backends/vhost-user.c
+++ b/backends/vhost-user.c
@@ -141,7 +141,7 @@ static void set_chardev(Object *obj, const char *value, 
Error **errp)
 Chardev *chr;
 
 if (b->completed) {
-error_setg(errp, QERR_PERMISSION_DENIED);
+error_setg(errp, "Property 'chardev' can no longer be set");
 return;
 }
 
-- 
2.37.2

[PATCH 0/4] Replace QERR_PERMISSION_DENIED by better error messages

2022-10-12 Thread Markus Armbruster

Markus Armbruster (4):
  qom: Improve error messages when property has no getter or setter
  backends: Improve error messages when property can no longer be set
  qtest: Improve error messages when property can not be set right now
  qerror: QERR_PERMISSION_DENIED is no longer used, drop

 include/qapi/qmp/qerror.h   | 3 ---
 backends/cryptodev-vhost-user.c | 2 +-
 backends/rng-egd.c  | 2 +-
 backends/rng-random.c   | 2 +-
 backends/vhost-user.c   | 2 +-
 qom/object.c| 6 --
 softmmu/qtest.c | 4 ++--
 7 files changed, 10 insertions(+), 11 deletions(-)

-- 
2.37.2

[PATCH 4/4] qerror: QERR_PERMISSION_DENIED is no longer used, drop

2022-10-12 Thread Markus Armbruster

Signed-off-by: Markus Armbruster 
---
 include/qapi/qmp/qerror.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/qapi/qmp/qerror.h b/include/qapi/qmp/qerror.h
index 596fce0c54..87ca83b155 100644
--- a/include/qapi/qmp/qerror.h
+++ b/include/qapi/qmp/qerror.h
@@ -50,9 +50,6 @@
 #define QERR_MISSING_PARAMETER \
 "Parameter '%s' is missing"
 
-#define QERR_PERMISSION_DENIED \
-"Insufficient permission to perform this operation"
-
 #define QERR_PROPERTY_VALUE_BAD \
 "Property '%s.%s' doesn't take value '%s'"
 
-- 
2.37.2

[PATCH 3/4] qtest: Improve error messages when property can not be set right now

2022-10-12 Thread Markus Armbruster

When you try to set qtest property "log" while the qtest object is
active, the error message blames "insufficient permission":

$ qemu-system-x86_64 -S -display none -nodefaults -monitor stdio -chardev 
socket,id=chrqt0,path=qtest.socket,server=on,wait=off -object 
qtest,id=qt0,chardev=chrqt0,log=/dev/null
QEMU 7.1.50 monitor - type 'help' for more information
(qemu) qom-set /objects/qt0 log qtest.log
Error: Insufficient permission to perform this operation

This implies it could work with "sufficient permission".  It can't.
Change the error message to:

Error: Property 'log' can not be set now

Same for property "chardev".

Signed-off-by: Markus Armbruster 
---
 softmmu/qtest.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/softmmu/qtest.c b/softmmu/qtest.c
index f8acef2628..afea7693d0 100644
--- a/softmmu/qtest.c
+++ b/softmmu/qtest.c
@@ -977,7 +977,7 @@ static void qtest_set_log(Object *obj, const char *value, 
Error **errp)
 QTest *q = QTEST(obj);
 
 if (qtest == q) {
-error_setg(errp, QERR_PERMISSION_DENIED);
+error_setg(errp, "Property 'log' can not be set now");
 } else {
 g_free(q->log);
 q->log = g_strdup(value);
@@ -997,7 +997,7 @@ static void qtest_set_chardev(Object *obj, const char 
*value, Error **errp)
 Chardev *chr;
 
 if (qtest == q) {
-error_setg(errp, QERR_PERMISSION_DENIED);
+error_setg(errp, "Property 'chardev' can not be set now");
 return;
 }
 
-- 
2.37.2

Re: [PATCH 0/3] iothread and irqfd support

2022-10-12 Thread Jinhao Fan


On 10/12/2022 10:39 PM, Klaus Jensen wrote:

I have been meaning to pick it up, but I got side-tracked. The polling
performance drop needs to be address as we discussed offline.

But the v4 looks pretty good and I can pick that up without the polling
support for now.


I've been using the v4 without polling for my daily work. It worked 
pretty well for my test workloads.


I'm not sure what needs to be done for the polling problem. I can try to 
add a completion batching mechanism with BH, similar to virtio-blk. Do 
you think this is the right direction?

Re: [PATCH] build: disable container-based cross compilers by default

2022-10-12 Thread Alex Bennée



Paolo Bonzini  writes:

> On 10/12/22 14:17, Alex Bennée wrote:
>>> Container-based cross compilers have some issues which were overlooked
>>> when they were only used for TCG tests, but are more visible since
>>> firmware builds try to use them:
>> We seem to have dropped our gating somewhere. Previously if a user did
>> not have docker or podman on their system none of the container stuff
>> would run.
>
> It's still there:
>
> container="no"
> if test $use_containers = "yes"; then
> case $($python "$source_path"/tests/docker/docker.py probe) in
> *docker) container=docker ;;
> podman) container=podman ;;
> no) container=no ;;
> esac
> if test "$container" != "no"; then
> docker_py="$python $source_path/tests/docker/docker.py --engine 
> $container"
> fi
> fi
>
> I think what's happening is that podman is there but there's no support
> for rootless containers, so "podman run" fails.

Ahh so we could improve our probe code then? I'm afraid I don't have
much personal testing coverage for podman stuff - I thought rootless
support was the main reason Fedora had transitioned to it.

>
> Paolo


-- 
Alex Bennée

Re: [PATCH v2 2/2] error handling: Use RETRY_ON_EINTR() macro where applicable

2022-10-12 Thread Bin Meng

Hi,

On Wed, Oct 12, 2022 at 8:32 PM Nikita Ivanov  wrote:
>
> There is a defined RETRY_ON_EINTR() macro in qemu/osdep.h which
> handles the same while loop.
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/415
>
> Signed-off-by: Nikita Ivanov 
> ---
>  block/file-posix.c| 37 -
>  chardev/char-pty.c|  4 +---
>  hw/9pfs/9p-local.c|  8 ++--
>  net/l2tpv3.c  | 17 +
>  net/socket.c  | 16 +++-
>  net/tap.c | 12 
>  qga/commands-posix.c  |  4 +---
>  semihosting/syscalls.c|  4 +---
>  tests/qtest/libqtest.c| 14 ++
>  tests/vhost-user-bridge.c |  4 +---
>  util/main-loop.c  |  4 +---
>  util/osdep.c  |  4 +---
>  util/vfio-helpers.c   | 12 ++--
>  13 files changed, 52 insertions(+), 88 deletions(-)
>

This patch has to be squashed into patch 1 for bisectability, as TFR
is already removed in patch 1.

Regards,
Bin

Re: [PATCH v2 2/2] error handling: Use RETRY_ON_EINTR() macro where applicable

2022-10-12 Thread Marc-André Lureau

Hi

On Wed, Oct 12, 2022 at 4:32 PM Nikita Ivanov 
wrote:

> There is a defined RETRY_ON_EINTR() macro in qemu/osdep.h which
> handles the same while loop.
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/415
>
> Signed-off-by: Nikita Ivanov 
>

Reviewed-by: Marc-André Lureau 



> ---
>  block/file-posix.c| 37 -
>  chardev/char-pty.c|  4 +---
>  hw/9pfs/9p-local.c|  8 ++--
>  net/l2tpv3.c  | 17 +
>  net/socket.c  | 16 +++-
>  net/tap.c | 12 
>  qga/commands-posix.c  |  4 +---
>  semihosting/syscalls.c|  4 +---
>  tests/qtest/libqtest.c| 14 ++
>  tests/vhost-user-bridge.c |  4 +---
>  util/main-loop.c  |  4 +---
>  util/osdep.c  |  4 +---
>  util/vfio-helpers.c   | 12 ++--
>  13 files changed, 52 insertions(+), 88 deletions(-)
>
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 66fdb07820..c589cb489b 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1238,9 +1238,7 @@ static int hdev_get_max_segments(int fd, struct stat
> *st)
>  ret = -errno;
>  goto out;
>  }
> -do {
> -ret = read(sysfd, buf, sizeof(buf) - 1);
> -} while (ret == -1 && errno == EINTR);
> +ret = RETRY_ON_EINTR(read(sysfd, buf, sizeof(buf) - 1));
>  if (ret < 0) {
>  ret = -errno;
>  goto out;
> @@ -1388,9 +1386,9 @@ static int handle_aiocb_ioctl(void *opaque)
>  RawPosixAIOData *aiocb = opaque;
>  int ret;
>
> -do {
> -ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd,
> aiocb->ioctl.buf);
> -} while (ret == -1 && errno == EINTR);
> +ret = RETRY_ON_EINTR(
> +ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf)
> +);
>  if (ret == -1) {
>  return -errno;
>  }
> @@ -1472,18 +1470,17 @@ static ssize_t
> handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
>  {
>  ssize_t len;
>
> -do {
> -if (aiocb->aio_type & QEMU_AIO_WRITE)
> -len = qemu_pwritev(aiocb->aio_fildes,
> -   aiocb->io.iov,
> -   aiocb->io.niov,
> -   aiocb->aio_offset);
> - else
> -len = qemu_preadv(aiocb->aio_fildes,
> -  aiocb->io.iov,
> -  aiocb->io.niov,
> -  aiocb->aio_offset);
> -} while (len == -1 && errno == EINTR);
> +len = RETRY_ON_EINTR(
> +(aiocb->aio_type & QEMU_AIO_WRITE) ?
> +qemu_pwritev(aiocb->aio_fildes,
> +   aiocb->io.iov,
> +   aiocb->io.niov,
> +   aiocb->aio_offset) :
> +qemu_preadv(aiocb->aio_fildes,
> +  aiocb->io.iov,
> +  aiocb->io.niov,
> +  aiocb->aio_offset)
> +);
>
>  if (len == -1) {
>  return -errno;
> @@ -1908,9 +1905,7 @@ static int allocate_first_block(int fd, size_t
> max_size)
>  buf = qemu_memalign(max_align, write_size);
>  memset(buf, 0, write_size);
>
> -do {
> -n = pwrite(fd, buf, write_size, 0);
> -} while (n == -1 && errno == EINTR);
> +n = RETRY_ON_EINTR(pwrite(fd, buf, write_size, 0));
>
>  ret = (n == -1) ? -errno : 0;
>
> diff --git a/chardev/char-pty.c b/chardev/char-pty.c
> index 53f25c6bbd..92fd33c854 100644
> --- a/chardev/char-pty.c
> +++ b/chardev/char-pty.c
> @@ -93,9 +93,7 @@ static void pty_chr_update_read_handler(Chardev *chr)
>  pfd.fd = fioc->fd;
>  pfd.events = G_IO_OUT;
>  pfd.revents = 0;
> -do {
> -rc = g_poll(, 1, 0);
> -} while (rc == -1 && errno == EINTR);
> +rc = RETRY_ON_EINTR(g_poll(, 1, 0));
>  assert(rc >= 0);
>
>  if (pfd.revents & G_IO_HUP) {
> diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
> index d42ce6d8b8..bb3187244f 100644
> --- a/hw/9pfs/9p-local.c
> +++ b/hw/9pfs/9p-local.c
> @@ -470,9 +470,7 @@ static ssize_t local_readlink(FsContext *fs_ctx,
> V9fsPath *fs_path,
>  if (fd == -1) {
>  return -1;
>  }
> -do {
> -tsize = read(fd, (void *)buf, bufsz);
> -} while (tsize == -1 && errno == EINTR);
> +tsize = RETRY_ON_EINTR(read(fd, (void *)buf, bufsz));
>  close_preserve_errno(fd);
>  } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
> (fs_ctx->export_flags & V9FS_SM_NONE)) {
> @@ -908,9 +906,7 @@ static int local_symlink(FsContext *fs_ctx, const char
> *oldpath,
>  }
>  /* Write the oldpath (target) to the file. */
>  oldpath_size = strlen(oldpath);
> -do {
> -write_size = write(fd, (void *)oldpath, oldpath_size);
> -} while (write_size == -1 && errno == EINTR);
> +write_size = RETRY_ON_EINTR(write(fd, (void

Re: [PATCH v4 6/6] tests/unit: make test-io-channel-command work on win32

2022-10-12 Thread Daniel P . Berrangé

On Thu, Oct 06, 2022 at 03:36:57PM +0400, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> This has been tested under msys2 & windows 11. I haven't tried to make
> it work with other environments yet, but that should be enough to
> validate the channel-command implementation anyway.
> 
> Here are the changes:
> - drop tests/ from fifo/pipe path, to avoid directory issues
> - use g_find_program() to lookup the socat executable (otherwise we
> would need to change ChanneCommand to use G_SPAWN_SEARCH_PATH, and deal
> with missing socat differently)
> - skip the "echo" test when socat is missing as well
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  tests/unit/test-io-channel-command.c | 37 ++--
>  1 file changed, 19 insertions(+), 18 deletions(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v4 2/6] util: make do_send_recv work with partial send/recv

2022-10-12 Thread Daniel P . Berrangé

On Thu, Oct 06, 2022 at 03:36:53PM +0400, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> According to msdn documentation and Linux man pages, send() should try
> to send as much as possible in blocking mode, while recv() may return
> earlier with a smaller available amount, we should try to continue
> send/recv from there.
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  util/iov.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [RFC PATCH 1/4] docs/devel: add a maintainers section to development process

2022-10-12 Thread Stefan Hajnoczi

On Wed, Oct 12, 2022 at 01:11:49PM +0100, Alex Bennée wrote:
> We don't currently have a clear place in the documentation to describe
> the rolls and responsibilities of a maintainer. Lets create one so we

s/roll/role/ in the commit description and the patch.

> can. I've moved a few small bits out of other files to try and keep
> everything in one place.
> 
> Signed-off-by: Alex Bennée 
> ---
>  docs/devel/code-of-conduct.rst   |  2 +
>  docs/devel/index-process.rst |  1 +
>  docs/devel/maintainers.rst   | 84 
>  docs/devel/submitting-a-pull-request.rst | 12 ++--
>  4 files changed, 91 insertions(+), 8 deletions(-)
>  create mode 100644 docs/devel/maintainers.rst
> 
> diff --git a/docs/devel/code-of-conduct.rst b/docs/devel/code-of-conduct.rst
> index 195444d1b4..f734ed0317 100644
> --- a/docs/devel/code-of-conduct.rst
> +++ b/docs/devel/code-of-conduct.rst
> @@ -1,3 +1,5 @@
> +.. _code_of_conduct:
> +
>  Code of Conduct
>  ===
>  
> diff --git a/docs/devel/index-process.rst b/docs/devel/index-process.rst
> index d0d7a200fd..d50dd74c3e 100644
> --- a/docs/devel/index-process.rst
> +++ b/docs/devel/index-process.rst
> @@ -8,6 +8,7 @@ Notes about how to interact with the community and how and 
> where to submit patch
>  
> code-of-conduct
> conflict-resolution
> +   maintainers
> style
> submitting-a-patch
> trivial-patches
> diff --git a/docs/devel/maintainers.rst b/docs/devel/maintainers.rst
> new file mode 100644
> index 00..e3c7003bfa
> --- /dev/null
> +++ b/docs/devel/maintainers.rst
> @@ -0,0 +1,84 @@
> +.. _maintainers:
> +
> +The Roll of Maintainers
> +===
> +
> +Maintainers are a critical part of the projects contributor ecosystem.

project's

> +They come from a wide range of backgrounds from unpaid hobbyists
> +working in their spare time to employees who work on the project as
> +part of their job. Maintainer activities include:
> +
> +  - reviewing patches and suggesting changes
> +  - preparing pull requests for their subsystems
> +  - participating other project activities
> +
> +They are also human and subject to the same pressures as everyone else
> +including overload and burn out. Like everyone else they are subject
> +to projects :ref:`code_of_conduct`.

to the project's

(Although "project's" can be dropped without changing the meaning.)

> +
> +The MAINTAINERS file
> +
> +
> +The `MAINTAINERS
> +`__
> +file contains the canonical list of who is a maintainer. The file
> +is machine readable so an appropriately configured git (see
> +:ref:`cc_the_relevant_maintainer`) can automatically Cc them on
> +patches that touch their area of code.
> +
> +The file also describes the status of the area of code to give an idea
> +of how actively that section is maintained.
> +
> +.. list-table:: Meaning of support status in MAINTAINERS
> +   :widths: 25 75
> +   :header-rows: 1
> +
> +   * - Status
> + - Meaning
> +   * - Supported
> + - Someone is actually paid to look after this.
> +   * - Maintained
> + - Someone actually looks after it.
> +   * - Odd Fixes
> + - It has a maintainer but they don't have time to do
> +   much other than throw the odd patch in.
> +   * - Orphan
> + - No current maintainer.
> +   * - Obsolete
> + - Old obsolete code, should use something else.
> +
> +Please bare in mind that even if someone is paid to support something
> +it does not mean they are paid to support you. This is open source and
> +the code comes with no warranty and the project makes no guarantees
> +about dealing with bugs or features requests.
> +
> +Becoming a maintainer
> +-
> +
> +Maintainers are volunteers who put themselves forward to keep an eye
> +on an area of code. They are generally accepted by the community to
> +have a good understanding of the subsystem and able to make a positive
> +contribution to the project.
> +
> +The process is simple - simply sent a patch to the list that updates
> +the ``MAINTAINERS`` file. Sometimes this is done as part of a larger
> +series when a new sub-system is being added to the code base. This can
> +also be done by a retiring maintainer who nominates their replacement
> +after discussion with other contributors.
> +
> +Once the patch is reviewed and merged the only other step is to make
> +sure your GPG key is signed.
> +
> +.. _maintainer_keys:
> +
> +Maintainer GPG Keys
> +~~~
> +
> +GPG is used to sign pull requests so they can be identified as really
> +coming from the maintainer. If your key is not already signed by
> +members of the QEMU community, you should make arrangements to attend
> +a `KeySigningParty `__ (for
> +example at KVM Forum) or make alternative arrangements to have your
> +key signed by an attendee. Key signing requires meeting

Re: [RFC PATCH 3/4] docs/devel: simplify the minimal checklist

2022-10-12 Thread Stefan Hajnoczi

On Wed, Oct 12, 2022 at 01:11:51PM +0100, Alex Bennée wrote:
> The bullet points are quite long and contain process tips. Move those
> bits of the bullet to the relevant sections and link to them. Use a
> table for nicer formatting of the checklist.
> 
> Signed-off-by: Alex Bennée 
> ---
>  docs/devel/submitting-a-patch.rst | 75 ---
>  roms/qboot|  2 +-
>  2 files changed, 50 insertions(+), 27 deletions(-)
> 
> diff --git a/docs/devel/submitting-a-patch.rst 
> b/docs/devel/submitting-a-patch.rst
> index fb1673e974..41771501bf 100644
> --- a/docs/devel/submitting-a-patch.rst
> +++ b/docs/devel/submitting-a-patch.rst
> @@ -12,25 +12,18 @@ be committed faster.
>  This page seems very long, so if you are only trying to post a quick
>  one-shot fix, the bare minimum we ask is that:
>  
> --  You **must** provide a Signed-off-by: line (this is a hard
> -   requirement because it's how you say "I'm legally okay to contribute
> -   this and happy for it to go into QEMU", modeled after the `Linux kernel
> -   
> `__
> -   policy.) ``git commit -s`` or ``git format-patch -s`` will add one.
> --  All contributions to QEMU must be **sent as patches** to the
> -   qemu-devel `mailing list 
> `__.
> -   Patch contributions should not be posted on the bug tracker, posted on
> -   forums, or externally hosted and linked to. (We have other mailing lists 
> too,
> -   but all patches must go to qemu-devel, possibly with a Cc: to another
> -   list.) ``git send-email`` (`step-by-step setup
> -   guide `__ and `hints and
> -   tips 
> `__)
> -   works best for delivering the patch without mangling it, but
> -   attachments can be used as a last resort on a first-time submission.
> --  You must read replies to your message, and be willing to act on them.
> -   Note, however, that maintainers are often willing to manually fix up
> -   first-time contributions, since there is a learning curve involved in
> -   making an ideal patch submission.
> +.. list-table:: Minimal Checklist for Patches
> +   :widths: 35 65
> +   :header-rows: 1
> +
> +   * - Check
> + - Reason
> +   * - Patches contain Signed-off-by: Author Name 
> + - States you are legally able to contribute the code. See 
> :ref:`patch_emails_must_include_a_signed_off_by_line`
> +   * - Sent as patch emails to ``qemu-devel@nongnu.org``
> + - The project uses an email list based workflow. See 
> :ref:`submitting_your_patches`
> +   * - Be prepared to respond to review comments
> + - Code that doesn't pass review will not get merged. See 
> :ref:`participating_in_code_review`
>  
>  You do not have to subscribe to post (list policy is to reply-to-all to
>  preserve CCs and keep non-subscribers in the loop on the threads they
> @@ -229,6 +222,19 @@ bisection doesn't land on a known-broken state.
>  Submitting your Patches
>  ---
>  
> +The QEMU project uses a public email based workflow for reviewing and
> +merging patches. As a result all contributions to QEMU must be **sent
> +as patches** to the qemu-devel `mailing list
> +`__. Patch
> +contributions should not be posted on the bug tracker, posted on
> +forums, or externally hosted and linked to. (We have other mailing
> +lists too, but all patches must go to qemu-devel, possibly with a Cc:
> +to another list.) ``git send-email`` (`step-by-step setup guide
> +`__ and `hints and tips
> +`__)
> +works best for delivering the patch without mangling it, but
> +attachments can be used as a last resort on a first-time submission.
> +
>  .. _if_you_cannot_send_patch_emails:
>  
>  If you cannot send patch emails
> @@ -314,10 +320,12 @@ git repository to fetch the original commit.
>  Patch emails must include a ``Signed-off-by:`` line
>  ~~~
>  
> -For more information see `SubmittingPatches 1.12
> -`__.
> -This is vital or we will not be able to apply your patch! Please use
> -your real name to sign a patch (not an alias or acronym).
> +Your patches **must** include a Signed-off-by: line. This is a hard
> +requirement because it's how you say "I'm legally okay to contribute
> +this and happy for it to go into QEMU". The process is modelled after
> +the `Linux kernel
>

Re: [RFC PATCH 0/4] docs/devel suggestions for discussion

2022-10-12 Thread Stefan Hajnoczi

On Wed, Oct 12, 2022 at 01:11:48PM +0100, Alex Bennée wrote:
> Hi,
> 
> This is an attempt to improve our processes documentation by:
> 
>  - adding an explicit section on maintainers
>  - reducing the up-front verbiage in patch submission
>  - emphasising the importance to respectful reviews
> 
> I'm sure the language could be improved further so I humbly submit
> this RFC for discussion.
> 
> Alex Bennée (4):
>   docs/devel: add a maintainers section to development process
>   docs/devel: make language a little less code centric
>   docs/devel: simplify the minimal checklist
>   docs/devel: try and improve the language around patch review
> 
>  docs/devel/code-of-conduct.rst   |   2 +
>  docs/devel/index-process.rst |   1 +
>  docs/devel/maintainers.rst   |  84 +++
>  docs/devel/submitting-a-patch.rst| 101 +++
>  docs/devel/submitting-a-pull-request.rst |  12 +--
>  roms/qboot   |   2 +-
>  6 files changed, 157 insertions(+), 45 deletions(-)
>  create mode 100644 docs/devel/maintainers.rst
> 
> -- 
> 2.34.1
> 

Modulo comments:

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [PATCH 0/3] iothread and irqfd support

2022-10-12 Thread Klaus Jensen

On Okt 12 10:28, Stefan Hajnoczi wrote:
> On Fri, 26 Aug 2022 at 07:18, Jinhao Fan  wrote:
> >
> > This patch series adds support for using a seperate iothread for NVMe
> > IO emulation, which brings the potential of applying polling. The
> > first two patches implements support for irqfd, which solves thread
> > safety problems for interrupt emulation outside the main loop thread.
> >
> > Jinhao Fan (3):
> >   hw/nvme: support irq(de)assertion with eventfd
> >   hw/nvme: use KVM irqfd when available
> >   hw/nvme: add iothread support
> 
> Hi,
> What is the status of this series?
> 

I have been meaning to pick it up, but I got side-tracked. The polling
performance drop needs to be address as we discussed offline.

But the v4 looks pretty good and I can pick that up without the polling
support for now.


signature.asc
Description: PGP signature

Re: [PULL 0/1] testing: revert pc-bios build patch

2022-10-12 Thread Stefan Hajnoczi

Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.2 for any 
user-visible changes.


signature.asc
Description: PGP signature

[PULL 15/16] tests/qtest: libqtest: Install signal handler via signal()

2022-10-12 Thread Thomas Huth

From: Bin Meng 

At present the codes uses sigaction() to install signal handler with
a flag SA_RESETHAND. Such usage can be covered by the signal() API
that is a simplified interface to the general sigaction() facility.

Update to use signal() to install the signal handler, as it is
available on Windows which we are going to support.

Signed-off-by: Bin Meng 
Reviewed-by: Marc-André Lureau 
Message-Id: <20221006151927.2079583-11-bmeng...@gmail.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/libqtest.c | 14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index 7b6152807b..b23eb3edc3 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -66,7 +66,7 @@ struct QTestState
 };
 
 static GHookList abrt_hooks;
-static struct sigaction sigact_old;
+static void (*sighandler_old)(int);
 
 static int qtest_query_target_endianness(QTestState *s);
 
@@ -179,20 +179,12 @@ static void sigabrt_handler(int signo)
 
 static void setup_sigabrt_handler(void)
 {
-struct sigaction sigact;
-
-/* Catch SIGABRT to clean up on g_assert() failure */
-sigact = (struct sigaction){
-.sa_handler = sigabrt_handler,
-.sa_flags = SA_RESETHAND,
-};
-sigemptyset(_mask);
-sigaction(SIGABRT, , _old);
+sighandler_old = signal(SIGABRT, sigabrt_handler);
 }
 
 static void cleanup_sigabrt_handler(void)
 {
-sigaction(SIGABRT, _old, NULL);
+signal(SIGABRT, sighandler_old);
 }
 
 static bool hook_list_is_empty(GHookList *hook_list)
-- 
2.31.1

[PULL 16/16] tests/unit/test-image-locking: Fix handling of temporary files

2022-10-12 Thread Thomas Huth

test-image-locking leaves some temporary files around - clean
them up. While we're at it, test-image-locking is a unit test,
so it should not use "qtest.*" for temporary file names. Give
them better names instead, so that it clear where the temporary
files come from.

Message-Id: <20221012085932.799221-1-th...@redhat.com>
Reviewed-by: Marc-André Lureau 
Signed-off-by: Thomas Huth 
---
 tests/unit/test-image-locking.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test-image-locking.c b/tests/unit/test-image-locking.c
index a47299c247..2624cec6a0 100644
--- a/tests/unit/test-image-locking.c
+++ b/tests/unit/test-image-locking.c
@@ -79,7 +79,7 @@ static void test_image_locking_basic(void)
 g_autofree char *img_path = NULL;
 uint64_t perm, shared_perm;
 
-int fd = g_file_open_tmp("qtest.XX", _path, NULL);
+int fd = g_file_open_tmp("qemu-tst-img-lock.XX", _path, NULL);
 assert(fd >= 0);
 
 perm = BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ;
@@ -120,7 +120,7 @@ static void test_set_perm_abort(void)
 g_autofree char *img_path = NULL;
 uint64_t perm, shared_perm;
 int r;
-int fd = g_file_open_tmp("qtest.XX", _path, NULL);
+int fd = g_file_open_tmp("qemu-tst-img-lock.XX", _path, NULL);
 assert(fd >= 0);
 
 perm = BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ;
@@ -140,6 +140,8 @@ static void test_set_perm_abort(void)
 check_locked_bytes(fd, perm, ~shared_perm);
 blk_unref(blk1);
 blk_unref(blk2);
+close(fd);
+unlink(img_path);
 }
 
 int main(int argc, char **argv)
-- 
2.31.1

[PULL 10/16] tests/x86: Add 'q35' machine type to drive_del-test

2022-10-12 Thread Thomas Huth

From: Michael Labiuk 

Configure pci bridge setting to run tests on 'q35' machine type.

Signed-off-by: Michael Labiuk 
Message-Id: <20220929223547.1429580-9-michael.lab...@virtuozzo.com>
Reviewed-by: Thomas Huth 
Signed-off-by: Thomas Huth 
---
 tests/qtest/drive_del-test.c | 107 +++
 1 file changed, 107 insertions(+)

diff --git a/tests/qtest/drive_del-test.c b/tests/qtest/drive_del-test.c
index 106c613f4f..9a750395a9 100644
--- a/tests/qtest/drive_del-test.c
+++ b/tests/qtest/drive_del-test.c
@@ -252,6 +252,27 @@ static void test_cli_device_del(void)
 qtest_quit(qts);
 }
 
+static void test_cli_device_del_q35(void)
+{
+QTestState *qts;
+
+/*
+ * -drive/-device and device_del.  Start with a drive used by a
+ * device that unplugs after reset.
+ */
+qts = qtest_initf("-drive if=none,id=drive0,file=null-co://,"
+  "file.read-zeroes=on,format=raw "
+  "-machine q35 -device pcie-root-port,id=p1 "
+  "-device pcie-pci-bridge,bus=p1,id=b1 "
+  "-device virtio-blk-%s,drive=drive0,bus=b1,id=dev0",
+  qvirtio_get_dev_type());
+
+device_del(qts, true);
+g_assert(!has_drive(qts));
+
+qtest_quit(qts);
+}
+
 static void test_empty_device_del(void)
 {
 QTestState *qts;
@@ -288,6 +309,43 @@ static void test_device_add_and_del(void)
 qtest_quit(qts);
 }
 
+static void device_add_q35(QTestState *qts)
+{
+g_autofree char *driver = g_strdup_printf("virtio-blk-%s",
+  qvirtio_get_dev_type());
+QDict *response =
+   qtest_qmp(qts, "{'execute': 'device_add',"
+  " 'arguments': {"
+  "   'driver': %s,"
+  "   'drive': 'drive0',"
+  "   'id': 'dev0',"
+  "   'bus': 'b1'"
+  "}}", driver);
+g_assert(response);
+g_assert(qdict_haskey(response, "return"));
+qobject_unref(response);
+}
+
+static void test_device_add_and_del_q35(void)
+{
+QTestState *qts;
+
+/*
+ * -drive/device_add and device_del.  Start with a drive used by a
+ * device that unplugs after reset.
+ */
+qts = qtest_initf("-machine q35 -device pcie-root-port,id=p1 "
+ "-device pcie-pci-bridge,bus=p1,id=b1 "
+ "-drive if=none,id=drive0,file=null-co://,"
+ "file.read-zeroes=on,format=raw");
+
+device_add_q35(qts);
+device_del(qts, true);
+g_assert(!has_drive(qts));
+
+qtest_quit(qts);
+}
+
 static void test_drive_add_device_add_and_del(void)
 {
 QTestState *qts;
@@ -312,6 +370,25 @@ static void test_drive_add_device_add_and_del(void)
 qtest_quit(qts);
 }
 
+static void test_drive_add_device_add_and_del_q35(void)
+{
+QTestState *qts;
+
+qts = qtest_init("-machine q35 -device pcie-root-port,id=p1 "
+ "-device pcie-pci-bridge,bus=p1,id=b1");
+
+/*
+ * drive_add/device_add and device_del.  The drive is used by a
+ * device that unplugs after reset.
+ */
+drive_add_with_media(qts);
+device_add_q35(qts);
+device_del(qts, true);
+g_assert(!has_drive(qts));
+
+qtest_quit(qts);
+}
+
 static void test_blockdev_add_device_add_and_del(void)
 {
 QTestState *qts;
@@ -336,6 +413,25 @@ static void test_blockdev_add_device_add_and_del(void)
 qtest_quit(qts);
 }
 
+static void test_blockdev_add_device_add_and_del_q35(void)
+{
+QTestState *qts;
+
+qts = qtest_init("-machine q35 -device pcie-root-port,id=p1 "
+ "-device pcie-pci-bridge,bus=p1,id=b1");
+
+/*
+ * blockdev_add/device_add and device_del. The drive is used by a
+ * device that unplugs after reset, but it doesn't go away.
+ */
+blockdev_add_with_media(qts);
+device_add_q35(qts);
+device_del(qts, true);
+g_assert(has_blockdev(qts));
+
+qtest_quit(qts);
+}
+
 int main(int argc, char **argv)
 {
 g_test_init(, , NULL);
@@ -357,6 +453,17 @@ int main(int argc, char **argv)
test_empty_device_del);
 qtest_add_func("/device_del/blockdev",
test_blockdev_add_device_add_and_del);
+
+if (qtest_has_machine("q35")) {
+qtest_add_func("/device_del/drive/cli_device_q35",
+   test_cli_device_del_q35);
+qtest_add_func("/device_del/drive/device_add_q35",
+   test_device_add_and_del_q35);
+qtest_add_func("/device_del/drive/drive_add_device_add_q35",
+   test_drive_add_device_add_and_del_q35);
+qtest_add_func("/device_del/blockdev_q35",
+   test_blockdev_add_device_add_and_del_q35);
+}
 }
 
 return g_test_run();
-- 
2.31.1

[PULL 14/16] tests/qtest: migration-test: Avoid using hardcoded /tmp

2022-10-12 Thread Thomas Huth

From: Bin Meng 

This case was written to use hardcoded /tmp directory for temporary
files. Update to use g_dir_make_tmp() for a portable implementation.

Signed-off-by: Bin Meng 
Reviewed-by: Marc-André Lureau 
Message-Id: <20221006151927.2079583-5-bmeng...@gmail.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/migration-test.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 0d153d6b5e..ef4427ff4d 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -102,7 +102,7 @@ static bool ufd_version_check(void)
 
 #endif
 
-static const char *tmpfs;
+static char *tmpfs;
 
 /* The boot file modifies memory area in [start_address, end_address)
  * repeatedly. It outputs a 'B' at a fixed rate while it's still running.
@@ -2451,10 +2451,10 @@ static bool kvm_dirty_ring_supported(void)
 
 int main(int argc, char **argv)
 {
-char template[] = "/tmp/migration-test-XX";
 const bool has_kvm = qtest_has_accel("kvm");
 const bool has_uffd = ufd_version_check();
 const char *arch = qtest_get_arch();
+g_autoptr(GError) err = NULL;
 int ret;
 
 g_test_init(, , NULL);
@@ -2479,9 +2479,10 @@ int main(int argc, char **argv)
 return g_test_run();
 }
 
-tmpfs = g_mkdtemp(template);
+tmpfs = g_dir_make_tmp("migration-test-XX", );
 if (!tmpfs) {
-g_test_message("g_mkdtemp on path (%s): %s", template, 
strerror(errno));
+g_test_message("g_dir_make_tmp on path (%s): %s", tmpfs,
+   err->message);
 }
 g_assert(tmpfs);
 
@@ -2612,6 +2613,7 @@ int main(int argc, char **argv)
 g_test_message("unable to rmdir: path (%s): %s",
tmpfs, strerror(errno));
 }
+g_free(tmpfs);
 
 return ret;
 }
-- 
2.31.1

[PULL 12/16] tests/avocado: Add missing require_netdev('user') checks

2022-10-12 Thread Thomas Huth

From: Peter Maydell 

Some avocado tests fail if QEMU was built without libslirp. Add
require_netdev('user') checks where necessary:

These tests try to ping 10.0.2.2 and expect it to succeed:
  boot_linux_console.py:BootLinuxConsole.test_arm_emcraft_sf2
  boot_linux_console.py:BootLinuxConsole.test_arm_orangepi_sd
  ppc_bamboo.py:BambooMachine.test_ppc_bamboo

These tests run a commandline that includes '-net user':
  machine_aspeed.py:AST2x00Machine.test_arm_ast2500_evb_builroot
  (and others that use the do_test_arm_aspeed_buidroot_start()
  or do_test_arm_aspeed_sdk_start() helper functions)

These changes seem to be sufficient for 'make check-avocado'
to not fail on a --disable-slirp build.

Signed-off-by: Peter Maydell 
Message-Id: <20221001195224.2453581-1-peter.mayd...@linaro.org>
Reviewed-by: Thomas Huth 
Reviewed-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Thomas Huth 
---
 tests/avocado/boot_linux_console.py | 4 
 tests/avocado/machine_aspeed.py | 3 +++
 tests/avocado/ppc_bamboo.py | 1 +
 3 files changed, 8 insertions(+)

diff --git a/tests/avocado/boot_linux_console.py 
b/tests/avocado/boot_linux_console.py
index f26e036ab5..ca9d09b0d7 100644
--- a/tests/avocado/boot_linux_console.py
+++ b/tests/avocado/boot_linux_console.py
@@ -381,6 +381,8 @@ def test_arm_emcraft_sf2(self):
 :avocado: tags=u-boot
 :avocado: tags=accel:tcg
 """
+self.require_netdev('user')
+
 uboot_url = ('https://raw.githubusercontent.com/'
  'Subbaraya-Sundeep/qemu-test-binaries/'
  'fe371d32e50ca682391e1e70ab98c2942aeffb01/u-boot')
@@ -779,6 +781,8 @@ def test_arm_orangepi_sd(self):
 :avocado: tags=machine:orangepi-pc
 :avocado: tags=device:sd
 """
+self.require_netdev('user')
+
 deb_url = ('https://apt.armbian.com/pool/main/l/'

'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
 deb_hash = '9fa84beda245cabf0b4fa84cf6eaa7738ead1da0'
diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py
index 0f64eb636c..124649a24b 100644
--- a/tests/avocado/machine_aspeed.py
+++ b/tests/avocado/machine_aspeed.py
@@ -93,6 +93,8 @@ def test_arm_ast2500_romulus_openbmc_v2_9_0(self):
 self.do_test_arm_aspeed(image_path)
 
 def do_test_arm_aspeed_buidroot_start(self, image, cpu_id):
+self.require_netdev('user')
+
 self.vm.set_console()
 self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw',
  '-net', 'nic', '-net', 'user')
@@ -193,6 +195,7 @@ def wait_for_console_pattern(self, success_message, 
vm=None):
  vm=vm)
 
 def do_test_arm_aspeed_sdk_start(self, image, cpu_id):
+self.require_netdev('user')
 self.vm.set_console()
 self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw',
  '-net', 'nic', '-net', 'user')
diff --git a/tests/avocado/ppc_bamboo.py b/tests/avocado/ppc_bamboo.py
index 102ff252df..a81be3d608 100644
--- a/tests/avocado/ppc_bamboo.py
+++ b/tests/avocado/ppc_bamboo.py
@@ -23,6 +23,7 @@ def test_ppc_bamboo(self):
 :avocado: tags=accel:tcg
 """
 self.require_accelerator("tcg")
+self.require_netdev('user')
 tar_url = ('http://landley.net/aboriginal/downloads/binaries/'
'system-image-powerpc-440fp.tar.gz')
 tar_hash = '53e5f16414b195b82d2c70272f81c2eedb39bad9'
-- 
2.31.1

[PULL 13/16] qtest: start a VNC test

2022-10-12 Thread Thomas Huth

From: Marc-André Lureau 

This is some of the simplest test we could perform, it simply connects
to the VNC server via passed-in socket FDs and checks the connection can
be established.

Another series will make this test work on Windows as well.

As always, more tests can be added later! :)

Signed-off-by: Marc-André Lureau 
Message-Id: <20221006130513.2683873-1-marcandre.lur...@redhat.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/vnc-display-test.c | 103 +
 tests/qtest/meson.build|   8 ++-
 2 files changed, 110 insertions(+), 1 deletion(-)
 create mode 100644 tests/qtest/vnc-display-test.c

diff --git a/tests/qtest/vnc-display-test.c b/tests/qtest/vnc-display-test.c
new file mode 100644
index 00..e2a9d682bb
--- /dev/null
+++ b/tests/qtest/vnc-display-test.c
@@ -0,0 +1,103 @@
+/*
+ * VNC display tests
+ *
+ * Copyright (c) 2022 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/sockets.h"
+#include "libqtest.h"
+#include 
+#include 
+
+typedef struct Test {
+QTestState *qts;
+VncConnection *conn;
+GMainLoop *loop;
+} Test;
+
+static void on_vnc_error(VncConnection* self,
+ const char* msg)
+{
+g_error("vnc-error: %s", msg);
+}
+
+static void on_vnc_auth_failure(VncConnection *self,
+const char *msg)
+{
+g_error("vnc-auth-failure: %s", msg);
+}
+
+static bool
+test_setup(Test *test)
+{
+#ifdef WIN32
+g_test_skip("Not supported on Windows yet");
+return false;
+#else
+int pair[2];
+
+test->qts = qtest_init("-vnc none -name vnc-test");
+
+g_assert_cmpint(qemu_socketpair(AF_UNIX, SOCK_STREAM, 0, pair), ==, 0);
+
+qtest_qmp_add_client(test->qts, "vnc", pair[1]);
+
+test->conn = vnc_connection_new();
+g_signal_connect(test->conn, "vnc-error",
+ G_CALLBACK(on_vnc_error), NULL);
+g_signal_connect(test->conn, "vnc-auth-failure",
+ G_CALLBACK(on_vnc_auth_failure), NULL);
+vnc_connection_set_auth_type(test->conn, VNC_CONNECTION_AUTH_NONE);
+vnc_connection_open_fd(test->conn, pair[0]);
+
+test->loop = g_main_loop_new(NULL, FALSE);
+return true;
+#endif
+}
+
+static void
+test_vnc_basic_on_vnc_initialized(VncConnection *self,
+ Test *test)
+{
+const char *name = vnc_connection_get_name(test->conn);
+
+g_assert_cmpstr(name, ==, "QEMU (vnc-test)");
+g_main_loop_quit(test->loop);
+}
+
+static void
+test_vnc_basic(void)
+{
+Test test;
+
+if (!test_setup()) {
+return;
+}
+
+g_signal_connect(test.conn, "vnc-initialized",
+ G_CALLBACK(test_vnc_basic_on_vnc_initialized), );
+
+g_main_loop_run(test.loop);
+
+qtest_quit(test.qts);
+g_object_unref(test.conn);
+g_main_loop_unref(test.loop);
+}
+
+int
+main(int argc, char **argv)
+{
+if (getenv("GTK_VNC_DEBUG")) {
+vnc_util_set_debug(true);
+}
+
+g_test_init(, , NULL);
+
+qtest_add_func("/vnc-display/basic", test_vnc_basic);
+
+return g_test_run();
+}
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 455f1bbb7e..c07a5b1a5f 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -306,8 +306,14 @@ qtests = {
   'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'),
 }
 
+gvnc = dependency('gvnc-1.0', required: false)
+if gvnc.found()
+  qtests += {'vnc-display-test': [gvnc]}
+  qtests_generic += [ 'vnc-display-test' ]
+endif
+
 if dbus_display
-qtests += {'dbus-display-test': [dbus_display1, gio]}
+  qtests += {'dbus-display-test': [dbus_display1, gio]}
 endif
 
 qtest_executables = {}
-- 
2.31.1

[PULL 11/16] tests/x86: Add 'q35' machine type to ivshmem-test

2022-10-12 Thread Thomas Huth

From: Michael Labiuk 

Configure pci bridge setting to test ivshmem on 'q35'.

Signed-off-by: Michael Labiuk 
Message-Id: <20220929223547.1429580-10-michael.lab...@virtuozzo.com>
Reviewed-by: Thomas Huth 
Signed-off-by: Thomas Huth 
---
 tests/qtest/ivshmem-test.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/tests/qtest/ivshmem-test.c b/tests/qtest/ivshmem-test.c
index 9611d05eb5..cd550c8935 100644
--- a/tests/qtest/ivshmem-test.c
+++ b/tests/qtest/ivshmem-test.c
@@ -378,6 +378,20 @@ static void test_ivshmem_server(void)
 close(thread.pipe[0]);
 }
 
+static void test_ivshmem_hotplug_q35(void)
+{
+QTestState *qts = qtest_init("-object memory-backend-ram,size=1M,id=mb1 "
+ "-device pcie-root-port,id=p1 "
+ "-device pcie-pci-bridge,bus=p1,id=b1 "
+ "-machine q35");
+
+qtest_qmp_device_add(qts, "ivshmem-plain", "iv1",
+ "{'memdev': 'mb1', 'bus': 'b1'}");
+qtest_qmp_device_del_send(qts, "iv1");
+
+qtest_quit(qts);
+}
+
 #define PCI_SLOT_HP 0x06
 
 static void test_ivshmem_hotplug(void)
@@ -469,6 +483,7 @@ int main(int argc, char **argv)
 {
 int ret, fd;
 gchar dir[] = "/tmp/ivshmem-test.XX";
+const char *arch = qtest_get_arch();
 
 g_test_init(, , NULL);
 
@@ -494,6 +509,9 @@ int main(int argc, char **argv)
 qtest_add_func("/ivshmem/pair", test_ivshmem_pair);
 qtest_add_func("/ivshmem/server", test_ivshmem_server);
 }
+if (!strcmp(arch, "x86_64") && qtest_has_machine("q35")) {
+qtest_add_func("/ivshmem/hotplug-q35", test_ivshmem_hotplug_q35);
+}
 
 out:
 ret = g_test_run();
-- 
2.31.1

[PULL 02/16] tests/migration: remove the unused local variable

2022-10-12 Thread Thomas Huth

From: dinglimin 

Remove the unused local variable "records".

Signed-off-by: dinglimin 
Reviewed-by: Ján Tomko 
Message-Id: <20220928080555.2263-1-dingli...@cmss.chinamobile.com>
Signed-off-by: Thomas Huth 
---
 tests/migration/guestperf/engine.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/migration/guestperf/engine.py 
b/tests/migration/guestperf/engine.py
index 87a6ab2009..59fca2c70b 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -65,7 +65,6 @@ def _vcpu_timing(self, pid, tid_list):
 return records
 
 def _cpu_timing(self, pid):
-records = []
 now = time.time()
 
 jiffies_per_sec = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
-- 
2.31.1

[PULL 08/16] tests/x86: Fix comment typo in drive_del-test

2022-10-12 Thread Thomas Huth

From: Michael Labiuk 

Signed-off-by: Michael Labiuk 
Message-Id: <20220929223547.1429580-7-michael.lab...@virtuozzo.com>
Reviewed-by: Thomas Huth 
Signed-off-by: Thomas Huth 
---
 tests/qtest/drive_del-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qtest/drive_del-test.c b/tests/qtest/drive_del-test.c
index 467e752b0d..44b9578801 100644
--- a/tests/qtest/drive_del-test.c
+++ b/tests/qtest/drive_del-test.c
@@ -327,7 +327,7 @@ static void test_blockdev_add_device_add_and_del(void)
 qts = qtest_init(machine_addition);
 
 /*
- * blockdev_add/device_add and device_del.  The it drive is used by a
+ * blockdev_add/device_add and device_del. The drive is used by a
  * device that unplugs after reset, but it doesn't go away.
  */
 blockdev_add_with_media(qts);
-- 
2.31.1

[PULL 07/16] tests/x86: Add 'q35' machine type to hotplug hd-geo-test

2022-10-12 Thread Thomas Huth

From: Michael Labiuk 

Add pci bridge setting to test hotplug.
Duplicate tests for plugging scsi and virtio devices for q35 machine type.

Signed-off-by: Michael Labiuk 
Message-Id: <20220929223547.1429580-6-michael.lab...@virtuozzo.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/hd-geo-test.c | 76 ++-
 1 file changed, 75 insertions(+), 1 deletion(-)

diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c
index 278464c379..4a7628077b 100644
--- a/tests/qtest/hd-geo-test.c
+++ b/tests/qtest/hd-geo-test.c
@@ -963,6 +963,42 @@ static void test_override_scsi_hot_unplug(void)
 test_override_hot_unplug(args, "scsi-disk0", expected, expected2);
 }
 
+static void test_override_scsi_hot_unplug_q35(void)
+{
+TestArgs *args = create_args();
+CHSResult expected[] = {
+{
+"/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@2/channel@0/disk@0,0",
+{1, 120, 30}
+},
+{
+"/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@2/channel@0/disk@1,0",
+{20, 20, 20}
+},
+{NULL, {0, 0, 0} }
+};
+CHSResult expected2[] = {
+{
+"/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@2/channel@0/disk@1,0",
+{20, 20, 20}
+},
+{NULL, {0, 0, 0} }
+};
+
+args->argc = append_arg(args->argc, args->argv, ARGV_SIZE,
+g_strdup("-device pcie-root-port,id=p0 "
+ "-device pcie-pci-bridge,bus=p0,id=b1 "
+ "-machine q35"));
+
+add_drive_with_mbr(args, empty_mbr, 1);
+add_drive_with_mbr(args, empty_mbr, 1);
+add_scsi_controller(args, "virtio-scsi-pci", "b1", 2);
+add_scsi_disk(args, 0, 0, 0, 0, 0, 1, 120, 30);
+add_scsi_disk(args, 1, 0, 0, 1, 0, 20, 20, 20);
+
+test_override_hot_unplug(args, "scsi-disk0", expected, expected2);
+}
+
 static void test_override_virtio_hot_unplug(void)
 {
 TestArgs *args = create_args();
@@ -986,6 +1022,41 @@ static void test_override_virtio_hot_unplug(void)
 test_override_hot_unplug(args, "virtio-disk0", expected, expected2);
 }
 
+static void test_override_virtio_hot_unplug_q35(void)
+{
+TestArgs *args = create_args();
+CHSResult expected[] = {
+{
+"/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@2/disk@0,0",
+{1, 120, 30}
+},
+{
+"/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@3/disk@0,0",
+{20, 20, 20}
+},
+{NULL, {0, 0, 0} }
+};
+CHSResult expected2[] = {
+{
+"/pci@i0cf8/pci-bridge@1/pci-bridge@0/scsi@3/disk@0,0",
+{20, 20, 20}
+},
+{NULL, {0, 0, 0} }
+};
+
+args->argc = append_arg(args->argc, args->argv, ARGV_SIZE,
+g_strdup("-device pcie-root-port,id=p0 "
+ "-device pcie-pci-bridge,bus=p0,id=b1 "
+ "-machine q35"));
+
+add_drive_with_mbr(args, empty_mbr, 1);
+add_drive_with_mbr(args, empty_mbr, 1);
+add_virtio_disk(args, 0, "b1", 2, 1, 120, 30);
+add_virtio_disk(args, 1, "b1", 3, 20, 20, 20);
+
+test_override_hot_unplug(args, "virtio-disk0", expected, expected2);
+}
+
 int main(int argc, char **argv)
 {
 Backend i;
@@ -1035,11 +1106,14 @@ int main(int argc, char **argv)
test_override_virtio_blk_q35);
 qtest_add_func("hd-geo/override/zero_chs_q35",
test_override_zero_chs_q35);
-
 if (qtest_has_device("lsi53c895a")) {
 qtest_add_func("hd-geo/override/scsi_q35",
test_override_scsi_q35);
 }
+qtest_add_func("hd-geo/override/scsi_hot_unplug_q35",
+   test_override_scsi_hot_unplug_q35);
+qtest_add_func("hd-geo/override/virtio_hot_unplug_q35",
+   test_override_virtio_hot_unplug_q35);
 }
 } else {
 g_test_message("QTEST_QEMU_IMG not set or qemu-img missing; "
-- 
2.31.1

[PULL 01/16] qtest: "-display none" is set in qtest_init()

2022-10-12 Thread Thomas Huth

From: Juan Quintela 

So we don't need to set anywhere else.

Signed-off-by: Juan Quintela 
[thuth: Drop changes in tests/qtest/fuzz/ since the fuzzers still need this]
Message-Id: <20220902165126.1482-2-quint...@redhat.com>
Signed-off-by: Thomas Huth 
---
 tests/qtest/bios-tables-test.c  | 2 +-
 tests/qtest/fuzz-lsi53c895a-test.c  | 2 +-
 tests/qtest/fuzz-megasas-test.c | 2 +-
 tests/qtest/fuzz-sb16-test.c| 6 +++---
 tests/qtest/fuzz-sdcard-test.c  | 6 +++---
 tests/qtest/fuzz-virtio-scsi-test.c | 2 +-
 tests/qtest/fuzz-xlnx-dp-test.c | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index 2ebeb530b2..e6096e7f73 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -725,7 +725,7 @@ static char *test_acpi_create_args(test_data *data, const 
char *params,
 }
 } else {
 args = g_strdup_printf("-machine %s %s -accel tcg "
-"-net none -display none %s "
+"-net none %s "
 "-drive id=hd0,if=none,file=%s,format=raw "
 "-device %s,drive=hd0 ",
  data->machine, data->tcg_only ? "" : "-accel kvm",
diff --git a/tests/qtest/fuzz-lsi53c895a-test.c 
b/tests/qtest/fuzz-lsi53c895a-test.c
index 434c16bf42..392a7ae7ed 100644
--- a/tests/qtest/fuzz-lsi53c895a-test.c
+++ b/tests/qtest/fuzz-lsi53c895a-test.c
@@ -21,7 +21,7 @@ static void test_lsi_do_msgout_cancel_req(void)
 return;
 }
 
-s = qtest_init("-M q35 -m 2G -display none -nodefaults "
+s = qtest_init("-M q35 -m 2G -nodefaults "
"-device lsi53c895a,id=scsi "
"-device scsi-hd,drive=disk0 "
"-drive file=null-co://,id=disk0,if=none,format=raw");
diff --git a/tests/qtest/fuzz-megasas-test.c b/tests/qtest/fuzz-megasas-test.c
index 287fe19fc7..8d7ed3723a 100644
--- a/tests/qtest/fuzz-megasas-test.c
+++ b/tests/qtest/fuzz-megasas-test.c
@@ -40,7 +40,7 @@ static void test_lp1878263_megasas_zero_iov_cnt(void)
  */
 static void test_gitlab_issue521_megasas_sgl_ovf(void)
 {
-QTestState *s = qtest_init("-display none -m 32M -machine q35 "
+QTestState *s = qtest_init("-m 32M -machine q35 "
"-nodefaults -device megasas "
"-device scsi-cd,drive=null0 "
"-blockdev "
diff --git a/tests/qtest/fuzz-sb16-test.c b/tests/qtest/fuzz-sb16-test.c
index add2a2ad39..fc445b1871 100644
--- a/tests/qtest/fuzz-sb16-test.c
+++ b/tests/qtest/fuzz-sb16-test.c
@@ -15,7 +15,7 @@
  */
 static void test_fuzz_sb16_0x1c(void)
 {
-QTestState *s = qtest_init("-M q35 -display none "
+QTestState *s = qtest_init("-M q35 "
"-device sb16,audiodev=snd0 "
"-audiodev none,id=snd0");
 qtest_outw(s, 0x22c, 0x41);
@@ -27,7 +27,7 @@ static void test_fuzz_sb16_0x1c(void)
 
 static void test_fuzz_sb16_0x91(void)
 {
-QTestState *s = qtest_init("-M pc -display none "
+QTestState *s = qtest_init("-M pc "
"-device sb16,audiodev=none "
"-audiodev id=none,driver=none");
 qtest_outw(s, 0x22c, 0xf141);
@@ -43,7 +43,7 @@ static void test_fuzz_sb16_0x91(void)
  */
 static void test_fuzz_sb16_0xd4(void)
 {
-QTestState *s = qtest_init("-M pc -display none "
+QTestState *s = qtest_init("-M pc "
"-device sb16,audiodev=none "
"-audiodev id=none,driver=none");
 qtest_outb(s, 0x22c, 0x41);
diff --git a/tests/qtest/fuzz-sdcard-test.c b/tests/qtest/fuzz-sdcard-test.c
index e7fd818148..cd134cdf55 100644
--- a/tests/qtest/fuzz-sdcard-test.c
+++ b/tests/qtest/fuzz-sdcard-test.c
@@ -18,7 +18,7 @@ static void oss_fuzz_29225(void)
 {
 QTestState *s;
 
-s = qtest_init(" -display none -m 512m -nodefaults -nographic"
+s = qtest_init(" -m 512m -nodefaults -nographic"
" -device sdhci-pci,sd-spec-version=3"
" -device sd-card,drive=d0"
" -drive if=none,index=0,file=null-co://,format=raw,id=d0");
@@ -61,7 +61,7 @@ static void oss_fuzz_36217(void)
 {
 QTestState *s;
 
-s = qtest_init(" -display none -m 32 -nodefaults -nographic"
+s = qtest_init(" -m 32 -nodefaults -nographic"
" -device sdhci-pci,sd-spec-version=3 "
"-device sd-card,drive=d0 "
"-drive if=none,index=0,file=null-co://,format=raw,id=d0");
@@ -95,7 +95,7 @@ static void oss_fuzz_36391(void)
 {
 QTestState *s;
 
-s = qtest_init(" -display none -m 512M -nodefaults -nographic"
+s = qtest_init(" -m 512M -nodefaults -nographic"
" -device sdhci-pci,sd-spec-version=3"
" -device sd-card,drive=drv"
" -drive 
if=none,index=0,file=null-co://,format=raw,id=drv");
diff --git

1 2 >

1 - 100 of 181 matches

Mail list logo