RE: [PATCH 3/9] hw/block/nvme: support per-namespace smart log

2020-10-05 Thread Dmitry Fomichev
> -Original Message-
> From: Keith Busch 
> Sent: Wednesday, September 30, 2020 6:04 PM
> To: qemu-block@nongnu.org; qemu-de...@nongnu.org; Klaus Jensen
> 
> Cc: Niklas Cassel ; Dmitry Fomichev
> ; Kevin Wolf ; Philippe
> Mathieu-Daudé ; Keith Busch 
> Subject: [PATCH 3/9] hw/block/nvme: support per-namespace smart log
> 
> Let the user specify a specific namespace if they want to get access
> stats for a specific namespace.
> 
> Signed-off-by: Keith Busch 
> ---
>  hw/block/nvme.c  | 66 +++-
>  include/block/nvme.h |  1 +
>  2 files changed, 41 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 8d2b5be567..41389b2b09 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1164,48 +1164,62 @@ static uint16_t nvme_create_sq(NvmeCtrl *n,
> NvmeRequest *req)
>  return NVME_SUCCESS;
>  }
> 
> +struct nvme_stats {
> +uint64_t units_read;
> +uint64_t units_written;
> +uint64_t read_commands;
> +uint64_t write_commands;
> +};
> +
> +static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats
> *stats)
> +{
> +BlockAcctStats *s = blk_get_stats(ns->blkconf.blk);
> +
> +stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >>
> BDRV_SECTOR_BITS;
> +stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >>
> BDRV_SECTOR_BITS;
> +stats->read_commands += s->nr_ops[BLOCK_ACCT_READ];
> +stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
> +}
> +
>  static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t
> buf_len,
>  uint64_t off, NvmeRequest *req)
>  {
>  uint32_t nsid = le32_to_cpu(req->cmd.nsid);
> -
> +struct nvme_stats stats = { 0 };
> +NvmeSmartLog smart = { 0 };
>  uint32_t trans_len;
> +NvmeNamespace *ns;
>  time_t current_ms;
> -uint64_t units_read = 0, units_written = 0;
> -uint64_t read_commands = 0, write_commands = 0;
> -NvmeSmartLog smart;
> -
> -if (nsid && nsid != 0x) {
> -return NVME_INVALID_FIELD | NVME_DNR;
> -}
> 
>  if (off >= sizeof(smart)) {
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
> 
> -for (int i = 1; i <= n->num_namespaces; i++) {
> -NvmeNamespace *ns = nvme_ns(n, i);
> -if (!ns) {
> -continue;
> -}
> -
> -BlockAcctStats *s = blk_get_stats(ns->blkconf.blk);
> +if (nsid != 0x) {
> +ns = nvme_ns(n, nsid);
> +if (!ns)
> +return NVME_INVALID_NSID | NVME_DNR;

checkpatch will complain about missing curly braces here

> +nvme_set_blk_stats(ns, );
> +} else {
> +int i;
> 
> -units_read += s->nr_bytes[BLOCK_ACCT_READ] >>
> BDRV_SECTOR_BITS;
> -units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >>
> BDRV_SECTOR_BITS;
> -read_commands += s->nr_ops[BLOCK_ACCT_READ];
> -write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
> +for (i = 1; i <= n->num_namespaces; i++) {
> +ns = nvme_ns(n, i);
> +if (!ns) {
> +continue;
> +}
> +nvme_set_blk_stats(ns, );
> +}
>  }
> 
>  trans_len = MIN(sizeof(smart) - off, buf_len);
> 
> -memset(, 0x0, sizeof(smart));
> -
> -smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(units_read,
> 1000));
> -smart.data_units_written[0] =
> cpu_to_le64(DIV_ROUND_UP(units_written,
> +smart.data_units_read[0] =
> cpu_to_le64(DIV_ROUND_UP(stats.units_read,
> +1000));
> +smart.data_units_written[0] =
> cpu_to_le64(DIV_ROUND_UP(stats.units_written,
> 1000));
> -smart.host_read_commands[0] = cpu_to_le64(read_commands);
> -smart.host_write_commands[0] = cpu_to_le64(write_commands);
> +smart.host_read_commands[0] = cpu_to_le64(stats.read_commands);
> +smart.host_write_commands[0] = cpu_to_le64(stats.write_commands);
> 
>  smart.temperature = cpu_to_le16(n->temperature);
> 
> @@ -2708,7 +2722,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice
> *pci_dev)
>  id->acl = 3;
>  id->aerl = n->params.aerl;
>  id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
> -id->lpa = NVME_LPA_EXTENDED;
> +id->lpa = NVME_LPA_NS_SMART | NVME_LPA_EXTENDED;
> 
>  /* recommended default value (~70 C) */
>  id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 58647bcdad..868cf53f0b 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -849,6 +849,7 @@ enum NvmeIdCtrlFrmw {
>  };
> 
>  enum NvmeIdCtrlLpa {
> +NVME_LPA_NS_SMART = 1 << 0,
>  NVME_LPA_EXTENDED = 1 << 2,
>  };
> 
> --
> 2.24.1




RE: [PATCH 1/9] hw/block/nvme: remove pointless rw indirection

2020-10-05 Thread Dmitry Fomichev
> -Original Message-
> From: Keith Busch 
> Sent: Wednesday, September 30, 2020 6:04 PM
> To: qemu-block@nongnu.org; qemu-de...@nongnu.org; Klaus Jensen
> 
> Cc: Niklas Cassel ; Dmitry Fomichev
> ; Kevin Wolf ; Philippe
> Mathieu-Daudé ; Keith Busch 
> Subject: [PATCH 1/9] hw/block/nvme: remove pointless rw indirection
> 
> The code switches on the opcode to invoke a function specific to that
> opcode. There's no point in consolidating back to a common function that
> just switches on that same opcode without any actual common code.
> Restore the opcode specific behavior without going back through another
> level of switches.
> 
> Signed-off-by: Keith Busch 
> ---
>  hw/block/nvme.c | 91 -
>  1 file changed, 29 insertions(+), 62 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index da8344f196..db52ea0db9 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -927,68 +927,12 @@ static void nvme_rw_cb(void *opaque, int ret)
>  nvme_enqueue_req_completion(nvme_cq(req), req);
>  }
> 
> -static uint16_t nvme_do_aio(BlockBackend *blk, int64_t offset, size_t len,
> -NvmeRequest *req)
> -{
> -BlockAcctCookie *acct = >acct;
> -BlockAcctStats *stats = blk_get_stats(blk);
> -
> -bool is_write = false;
> -
> -trace_pci_nvme_do_aio(nvme_cid(req), req->cmd.opcode,
> -  nvme_io_opc_str(req->cmd.opcode), blk_name(blk),
> -  offset, len);
> -
> -switch (req->cmd.opcode) {
> -case NVME_CMD_FLUSH:
> -block_acct_start(stats, acct, 0, BLOCK_ACCT_FLUSH);
> -req->aiocb = blk_aio_flush(blk, nvme_rw_cb, req);
> -break;
> -
> -case NVME_CMD_WRITE_ZEROES:
> -block_acct_start(stats, acct, len, BLOCK_ACCT_WRITE);
> -req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len,
> -   BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
> -   req);
> -break;
> -
> -case NVME_CMD_WRITE:
> -is_write = true;
> -
> -/* fallthrough */
> -
> -case NVME_CMD_READ:
> -block_acct_start(stats, acct, len,
> - is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
> -
> -if (req->qsg.sg) {
> -if (is_write) {
> -req->aiocb = dma_blk_write(blk, >qsg, offset,
> -   BDRV_SECTOR_SIZE, nvme_rw_cb, 
> req);
> -} else {
> -req->aiocb = dma_blk_read(blk, >qsg, offset,
> -  BDRV_SECTOR_SIZE, nvme_rw_cb, req);
> -}
> -} else {
> -if (is_write) {
> -req->aiocb = blk_aio_pwritev(blk, offset, >iov, 0,
> - nvme_rw_cb, req);
> -} else {
> -req->aiocb = blk_aio_preadv(blk, offset, >iov, 0,
> -nvme_rw_cb, req);
> -}
> -}
> -
> -break;
> -}
> -
> -return NVME_NO_COMPLETE;
> -}
> -
>  static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
>  {
> -NvmeNamespace *ns = req->ns;
> -return nvme_do_aio(ns->blkconf.blk, 0, 0, req);
> +block_acct_start(blk_get_stats(n->conf.blk), >acct, 0,

This should be
block_acct_start(blk_get_stats(req->ns->blkconf.blk), >acct, 0,

> + BLOCK_ACCT_FLUSH);
> +req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);

and here
req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_rw_cb, req);

> +return NVME_NO_COMPLETE;
>  }
> 
>  static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
> @@ -1009,7 +953,11 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n,
> NvmeRequest *req)
>  return status;
>  }
> 
> -return nvme_do_aio(ns->blkconf.blk, offset, count, req);
> +block_acct_start(blk_get_stats(n->conf.blk), >acct, 0,

 block_acct_start(blk_get_stats(ns->blkconf.blk), >acct, 0,

> + BLOCK_ACCT_WRITE);
> +req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,

Here as well,
 req->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, offset, count,

> +   BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
> +return NVME_NO_COMPLETE;
>  }
> 
>  static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
> @@ -1023,6 +971,7 @@ static uint16_t nvme_rw(NvmeCtrl *n,
> NvmeRequest *req)
>  uint64_t data_offset = nvme_l2b(ns, slba);
>  enum BlockAcctType acct = req->cmd.opcode == NVME_CMD_WRITE ?
>  BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
> +BlockBackend *blk = ns->blkconf.blk;
>  uint16_t status;
> 
>  trace_pci_nvme_rw(nvme_cid(req), nvme_io_opc_str(rw->opcode),
> @@ -1045,7 +994,25 @@ static uint16_t nvme_rw(NvmeCtrl *n,
> NvmeRequest *req)
>  goto invalid;
>  }
> 
> -return nvme_do_aio(ns->blkconf.blk, data_offset, data_size, 

RE: [PATCH v5 09/14] hw/block/nvme: Support Zoned Namespace Command Set

2020-10-05 Thread Dmitry Fomichev
> -Original Message-
> From: Niklas Cassel 
> Sent: Monday, October 5, 2020 7:41 AM
> To: Dmitry Fomichev 
> Cc: Alistair Francis ; qemu-de...@nongnu.org;
> Damien Le Moal ; f...@euphon.net; Matias
> Bjorling ; qemu-block@nongnu.org;
> kw...@redhat.com; mlevi...@redhat.com; k.jen...@samsung.com;
> kbu...@kernel.org; phi...@redhat.com
> Subject: Re: [PATCH v5 09/14] hw/block/nvme: Support Zoned Namespace
> Command Set
> 
> On Sun, Oct 04, 2020 at 11:57:07PM +, Dmitry Fomichev wrote:
> > On Wed, 2020-09-30 at 14:50 +, Niklas Cassel wrote:
> > > On Mon, Sep 28, 2020 at 11:35:23AM +0900, Dmitry Fomichev wrote:
> > > > The emulation code has been changed to advertise NVM Command Set
> when
> > > > "zoned" device property is not set (default) and Zoned Namespace
> > > > Command Set otherwise.
> > > >
> > > > Handlers for three new NVMe commands introduced in Zoned
> Namespace
> > > > Command Set specification are added, namely for Zone Management
> > > > Receive, Zone Management Send and Zone Append.
> > > >
> > > > Device initialization code has been extended to create a proper
> > > > configuration for zoned operation using device properties.
> > > >
> > > > Read/Write command handler is modified to only allow writes at the
> > > > write pointer if the namespace is zoned. For Zone Append command,
> > > > writes implicitly happen at the write pointer and the starting write
> > > > pointer value is returned as the result of the command. Write Zeroes
> > > > handler is modified to add zoned checks that are identical to those
> > > > done as a part of Write flow.
> > > >
> > > > The code to support for Zone Descriptor Extensions is not included in
> > > > this commit and ZDES 0 is always reported. A later commit in this
> > > > series will add ZDE support.
> > > >
> > > > This commit doesn't yet include checks for active and open zone
> > > > limits. It is assumed that there are no limits on either active or
> > > > open zones.
> > > >
> > > > Signed-off-by: Niklas Cassel 
> > > > Signed-off-by: Hans Holmberg 
> > > > Signed-off-by: Ajay Joshi 
> > > > Signed-off-by: Chaitanya Kulkarni 
> > > > Signed-off-by: Matias Bjorling 
> > > > Signed-off-by: Aravind Ramesh 
> > > > Signed-off-by: Shin'ichiro Kawasaki 
> > > > Signed-off-by: Adam Manzanares 
> > > > Signed-off-by: Dmitry Fomichev 
> > > > ---
> > > >  block/nvme.c |   2 +-
> > > >  hw/block/nvme-ns.c   | 185 -
> > > >  hw/block/nvme-ns.h   |   6 +-
> > > >  hw/block/nvme.c  | 872
> +--
> > > >  include/block/nvme.h |   6 +-
> > > >  5 files changed, 1033 insertions(+), 38 deletions(-)
> > > >
> > > > diff --git a/block/nvme.c b/block/nvme.c
> > > > index 05485fdd11..7a513c9a17 100644
> > > > --- a/block/nvme.c
> > > > +++ b/block/nvme.c
> 
> (snip)
> 
> > >
> > > Please read my comment on nvme_identify_nslist_csi() before reading
> > > this comment.
> > >
> > > At least for this function, the specification is clear:
> > >
> > > "If the host requests a data structure for an I/O Command Set that the
> > > controller does not support, the controller shall abort the command with
> > > a status of Invalid Field in Command."
> > >
> > > If the controller supports the I/O command set == if the Command Set bit
> > > is set in the data struct returned by the nvme_identify_cmd_set(),
> > > so here we should do something like:
> > >
> > > } else if (->csi == NVME_CSI_ZONED && ctrl_has_zns_namespaces()) {
> > >   ...
> > > }
> > >
> >
> > With this commit, the controller supports ZNS command set regardless of
> > the number of attached ZNS namespaces. It could be zero, but the
> controller
> > still supports it. I think it would be better not to change the behavior
> > of this command to depend on whether there are any ZNS namespaces
> added
> > or not.
> 
> Ok, always having ZNS Command Set support, regardless if a user defines
> a zoned namespace on the QEMU command line or not, does simplify things.
> 
> But then in nvme_identify_cmd_set(), you need to call
> NVME_SET_CSI(*list, NVME_CSI_ZONED) unconditionally.
> 

Perhaps,
NVME_SET_CSI(*list, NVME_CSI_NVM)
NVME_SET_CSI(*list, NVME_CSI_ZONED)

since this is a vector...

> (Right now you loop though all namespaces, and only set the support bit
> if you find a zoned namespace.)
> 
> > > Like I wrote in my review comment in the patch that added support for
> the new
> > > allocated CNS values, I prefer if we remove this for-loop completely, and
> > > simply set attached = true in nvme_ns_setup()/nvme_ns_init() instead.
> > >
> > > (I was considering if we should set attach = true in
> nvme_zoned_init_ns(),
> > > but because nvme_ns_setup()/nvme_ns_init() is called for all
> namespaces,
> > > including ZNS namespaces, I don't think that any additional code in
> > > nvme_zoned_init_ns() is warranted.)
> >
> > I think CC.CSS value is not available during namespace setup and if we
> > assign active flag in nvme_zoned_ns_setup(), zoned namespaces 

Re: [PATCH v10 5/9] copy-on-read: limit guest COR activity to base in COR driver

2020-10-05 Thread Andrey Shinkevich

On 05.10.2020 17:58, Vladimir Sementsov-Ogievskiy wrote:

29.09.2020 15:38, Andrey Shinkevich wrote:

Limit the guest's COR operations by the base node in the backing chain
when the base node name is given. It will be useful for a block stream
job when the COR-filter is applied.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 38 --
  1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e04092f..f53f7e0 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -121,8 +121,42 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,

 size_t qiov_offset,
 int flags)
  {
-    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, 
qiov_offset,

-   flags | BDRV_REQ_COPY_ON_READ);
+    int64_t n = 0;
+    int64_t size = offset + bytes;
+    int local_flags;
+    int ret;
+    BDRVStateCOR *state = bs->opaque;
+
+    if (!state->base_bs) {
+    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, 
qiov_offset,

+   flags | BDRV_REQ_COPY_ON_READ);
+    }
+
+    while (offset < size) {
+    local_flags = flags;
+
+    /* In case of failure, try to copy-on-read anyway */


But you add the flag only in case of success.. On any failure of furhter 
is*allocated calls we should set the flag.




Actually, myself would prefer returning the error instead.

Andrey


+    ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+    if (!ret) {
+    ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_bs, false, 
offset, n, );

+    if (ret > 0) {
+    local_flags |= BDRV_REQ_COPY_ON_READ;
+    }
+    }
+
+    ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, 
qiov_offset,

+  local_flags);
+    if (ret < 0) {
+    return ret;
+    }
+
+    offset += n;
+    qiov_offset += n;
+    bytes -= n;
+    }
+
+    return 0;
  }








Re: [PATCH v10 5/9] copy-on-read: limit guest COR activity to base in COR driver

2020-10-05 Thread Andrey Shinkevich




On 05.10.2020 17:58, Vladimir Sementsov-Ogievskiy wrote:

29.09.2020 15:38, Andrey Shinkevich wrote:

Limit the guest's COR operations by the base node in the backing chain
when the base node name is given. It will be useful for a block stream
job when the COR-filter is applied.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 38 --
  1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e04092f..f53f7e0 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -121,8 +121,42 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,

 size_t qiov_offset,
 int flags)
  {
-    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, 
qiov_offset,

-   flags | BDRV_REQ_COPY_ON_READ);
+    int64_t n = 0;
+    int64_t size = offset + bytes;
+    int local_flags;
+    int ret;
+    BDRVStateCOR *state = bs->opaque;
+
+    if (!state->base_bs) {
+    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, 
qiov_offset,

+   flags | BDRV_REQ_COPY_ON_READ);
+    }
+
+    while (offset < size) {
+    local_flags = flags;
+
+    /* In case of failure, try to copy-on-read anyway */


But you add the flag only in case of success.. On any failure of furhter 
is*allocated calls we should set the flag.


Yes, thanks.
Andrey




+    ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+    if (!ret) {
+    ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_bs, false, 
offset, n, );

+    if (ret > 0) {
+    local_flags |= BDRV_REQ_COPY_ON_READ;
+    }
+    }
+
+    ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, 
qiov_offset,

+  local_flags);
+    if (ret < 0) {
+    return ret;
+    }
+
+    offset += n;
+    qiov_offset += n;
+    bytes -= n;
+    }
+
+    return 0;
  }








Re: [PATCH v8 02/14] monitor: Add Monitor parameter to monitor_get_cpu_index()

2020-10-05 Thread Eric Blake
On 10/5/20 10:58 AM, Kevin Wolf wrote:
> Most callers actually don't have to rely on cur_mon, but already know
> for which monitor they call monitor_get_cpu_index().
> 
> Signed-off-by: Kevin Wolf 
> ---
>  include/monitor/monitor.h  |  2 +-
>  hw/core/machine-hmp-cmds.c |  2 +-
>  monitor/hmp-cmds.c |  2 +-
>  monitor/misc.c | 20 ++--
>  softmmu/cpus.c |  2 +-
>  5 files changed, 14 insertions(+), 14 deletions(-)
> 
Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


[PATCH v8 04/14] hmp: Update current monitor only in handle_hmp_command()

2020-10-05 Thread Kevin Wolf
The current monitor is updated relatively early in the command handling
code even though only the command handler actually needs it.

The current monitor will become coroutine-local later, so we can only
update it when we know in which coroutine the command will be exectued.
Move it to handle_hmp_command() where this information will be
available.

Signed-off-by: Kevin Wolf 
Reviewed-by: Eric Blake 
---
 monitor/hmp.c  | 10 +-
 monitor/misc.c |  5 -
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/monitor/hmp.c b/monitor/hmp.c
index e0cc9e65dd..560ec98e7b 100644
--- a/monitor/hmp.c
+++ b/monitor/hmp.c
@@ -1061,6 +1061,7 @@ void handle_hmp_command(MonitorHMP *mon, const char 
*cmdline)
 QDict *qdict;
 const HMPCommand *cmd;
 const char *cmd_start = cmdline;
+Monitor *old_mon;
 
 trace_handle_hmp_command(mon, cmdline);
 
@@ -1079,7 +1080,11 @@ void handle_hmp_command(MonitorHMP *mon, const char 
*cmdline)
 return;
 }
 
+/* old_mon is non-NULL when called from qmp_human_monitor_command() */
+old_mon = monitor_set_cur(>common);
 cmd->cmd(>common, qdict);
+monitor_set_cur(old_mon);
+
 qobject_unref(qdict);
 }
 
@@ -1301,11 +1306,8 @@ cleanup:
 static void monitor_read(void *opaque, const uint8_t *buf, int size)
 {
 MonitorHMP *mon = container_of(opaque, MonitorHMP, common);
-Monitor *old_mon;
 int i;
 
-old_mon = monitor_set_cur(>common);
-
 if (mon->rs) {
 for (i = 0; i < size; i++) {
 readline_handle_byte(mon->rs, buf[i]);
@@ -1317,8 +1319,6 @@ static void monitor_read(void *opaque, const uint8_t 
*buf, int size)
 handle_hmp_command(mon, (char *)buf);
 }
 }
-
-monitor_set_cur(old_mon);
 }
 
 static void monitor_event(void *opaque, QEMUChrEvent event)
diff --git a/monitor/misc.c b/monitor/misc.c
index ee8db45094..4a859fb24a 100644
--- a/monitor/misc.c
+++ b/monitor/misc.c
@@ -120,17 +120,13 @@ char *qmp_human_monitor_command(const char *command_line, 
bool has_cpu_index,
 int64_t cpu_index, Error **errp)
 {
 char *output = NULL;
-Monitor *old_mon;
 MonitorHMP hmp = {};
 
 monitor_data_init(, false, true, false);
 
-old_mon = monitor_set_cur();
-
 if (has_cpu_index) {
 int ret = monitor_set_cpu(, cpu_index);
 if (ret < 0) {
-monitor_set_cur(old_mon);
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
"a CPU number");
 goto out;
@@ -138,7 +134,6 @@ char *qmp_human_monitor_command(const char *command_line, 
bool has_cpu_index,
 }
 
 handle_hmp_command(, command_line);
-monitor_set_cur(old_mon);
 
 WITH_QEMU_LOCK_GUARD(_lock) {
 if (qstring_get_length(hmp.common.outbuf) > 0) {
-- 
2.25.4




Re: [PATCH v10 2/9] copy-on-read: add filter append/drop functions

2020-10-05 Thread Andrey Shinkevich

On 05.10.2020 16:34, Vladimir Sementsov-Ogievskiy wrote:

29.09.2020 15:38, Andrey Shinkevich wrote:

Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 84 


  block/copy-on-read.h | 35 ++
  2 files changed, 119 insertions(+)
  create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..3c8231f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
  #include "qemu/osdep.h"
  #include "block/block_int.h"
  #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+    bool active;
+} BDRVStateCOR;
  static int cor_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
  {
+    BDRVStateCOR *state = bs->opaque;
+
  bs->file = bdrv_open_child(NULL, options, "file", bs, 
_of_bds,
 BDRV_CHILD_FILTERED | 
BDRV_CHILD_PRIMARY,

 false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict 
*options, int flags,

  ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  bs->file->bs->supported_zero_flags);
+    state->active = true;
+
+    /*
+ * We don't need to call bdrv_child_refresh_perms() now as the 
permissions

+ * will be updated later when the filter node gets its parent.
+ */
+
  return 0;
  }
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, 
BdrvChild *c,

 uint64_t perm, uint64_t shared,
 uint64_t *nperm, uint64_t *nshared)
  {
+    BDRVStateCOR *s = bs->opaque;
+
+    if (!s->active) {
+    /*
+ * While the filter is being removed
+ */
+    *nperm = 0;
+    *nshared = BLK_PERM_ALL;
+    return;
+    }
+
  *nperm = perm & PERM_PASSTHROUGH;
  *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, 
bool locked)

  static BlockDriver bdrv_copy_on_read = {
  .format_name    = "copy-on-read",
+    .instance_size  = sizeof(BDRVStateCOR),
  .bdrv_open  = cor_open,
  .bdrv_child_perm    = cor_child_perm,
@@ -159,4 +188,59 @@ static void bdrv_copy_on_read_init(void)
  bdrv_register(_copy_on_read);
  }
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ QDict *node_options,
+ int flags, Error **errp)



Ok, now function can add ~any filter, not only COR.. But it's a pair for 
bdrv_cor_filter_drop(), and with "active" hack we don't want make the 
functions generic I think. So it's OK for now to keep function here and 
named _cor_.



+{
+    BlockDriverState *cor_filter_bs;
+    Error *local_err = NULL;
+
+    cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+    if (cor_filter_bs == NULL) {
+    error_prepend(errp, "Could not create COR-filter node: ");
+    return NULL;
+    }


You've dropped setting ->implicit field if filter_node_name not 
specified. Probably caller now can do it.. I don't really care about 
implicit case, so it's OK for me if it works with iotests.


Thank you for your R-B. The idea behind setting the 'implicit' member by 
a caller is to prepare the code for the node replacement by a function 
at the block generic layer in future. In the scope of this series, that 
may be better to keep it here.


Andrey



So,

Reviewed-by: Vladimir Sementsov-Ogievskiy 






[PATCH v8 14/14] block: Convert 'block_resize' to coroutine

2020-10-05 Thread Kevin Wolf
block_resize performs some I/O that could potentially take quite some
time, so use it as an example for the new 'coroutine': true annotation
in the QAPI schema.

bdrv_truncate() requires that we're already in the right AioContext for
the BlockDriverState if called in coroutine context. So instead of just
taking the AioContext lock, move the QMP handler coroutine to the
context.

Call blk_unref() only after switching back because blk_unref() may only
be called in the main thread.

Signed-off-by: Kevin Wolf 
---
 qapi/block-core.json |  3 ++-
 blockdev.c   | 16 
 hmp-commands.hx  |  1 +
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 12a24772b5..90383b0148 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1304,7 +1304,8 @@
 { 'command': 'block_resize',
   'data': { '*device': 'str',
 '*node-name': 'str',
-'size': 'int' } }
+'size': 'int' },
+  'coroutine': true }
 
 ##
 # @NewImageMode:
diff --git a/blockdev.c b/blockdev.c
index bebd3ba1c3..e331f81034 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2439,14 +2439,14 @@ BlockDirtyBitmapSha256 
*qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
 return ret;
 }
 
-void qmp_block_resize(bool has_device, const char *device,
-  bool has_node_name, const char *node_name,
-  int64_t size, Error **errp)
+void coroutine_fn qmp_block_resize(bool has_device, const char *device,
+   bool has_node_name, const char *node_name,
+   int64_t size, Error **errp)
 {
 Error *local_err = NULL;
 BlockBackend *blk = NULL;
 BlockDriverState *bs;
-AioContext *aio_context;
+AioContext *old_ctx;
 
 bs = bdrv_lookup_bs(has_device ? device : NULL,
 has_node_name ? node_name : NULL,
@@ -2456,9 +2456,6 @@ void qmp_block_resize(bool has_device, const char *device,
 return;
 }
 
-aio_context = bdrv_get_aio_context(bs);
-aio_context_acquire(aio_context);
-
 if (size < 0) {
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
 goto out;
@@ -2475,12 +2472,15 @@ void qmp_block_resize(bool has_device, const char 
*device,
 }
 
 bdrv_drained_begin(bs);
+old_ctx = bdrv_co_enter(bs);
 blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
+bdrv_co_leave(bs, old_ctx);
 bdrv_drained_end(bs);
 
 out:
+bdrv_co_lock(bs);
 blk_unref(blk);
-aio_context_release(aio_context);
+bdrv_co_unlock(bs);
 }
 
 void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 1088d64503..de83a5cc61 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -76,6 +76,7 @@ ERST
 .params = "device size",
 .help   = "resize a block image",
 .cmd= hmp_block_resize,
+.coroutine  = true,
 },
 
 SRST
-- 
2.25.4




[PATCH v8 12/14] block: Add bdrv_co_enter()/leave()

2020-10-05 Thread Kevin Wolf
Add a pair of functions to temporarily move the current coroutine to the
AioContext of a given BlockDriverState.

Signed-off-by: Kevin Wolf 
---
 include/block/block.h | 17 +
 block.c   | 23 +++
 2 files changed, 40 insertions(+)

diff --git a/include/block/block.h b/include/block/block.h
index 981ab5b314..b5fa7b2229 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -626,6 +626,23 @@ bool bdrv_debug_is_suspended(BlockDriverState *bs, const 
char *tag);
  */
 AioContext *bdrv_get_aio_context(BlockDriverState *bs);
 
+/**
+ * Move the current coroutine to the AioContext of @bs and return the old
+ * AioContext of the coroutine. Increase bs->in_flight so that draining @bs
+ * will wait for the operation to proceed until the corresponding
+ * bdrv_co_leave().
+ *
+ * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
+ * this will deadlock.
+ */
+AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
+
+/**
+ * Ends a section started by bdrv_co_enter(). Move the current coroutine back
+ * to old_ctx and decrease bs->in_flight again.
+ */
+void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
+
 /**
  * Transfer control to @co in the aio context of @bs
  */
diff --git a/block.c b/block.c
index f4b6dd5d3d..5eac2683b2 100644
--- a/block.c
+++ b/block.c
@@ -6372,6 +6372,29 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
 return bs ? bs->aio_context : qemu_get_aio_context();
 }
 
+AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
+{
+Coroutine *self = qemu_coroutine_self();
+AioContext *old_ctx = qemu_coroutine_get_aio_context(self);
+AioContext *new_ctx;
+
+/*
+ * Increase bs->in_flight to ensure that this operation is completed before
+ * moving the node to a different AioContext. Read new_ctx only afterwards.
+ */
+bdrv_inc_in_flight(bs);
+
+new_ctx = bdrv_get_aio_context(bs);
+aio_co_reschedule_self(new_ctx);
+return old_ctx;
+}
+
+void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
+{
+aio_co_reschedule_self(old_ctx);
+bdrv_dec_in_flight(bs);
+}
+
 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
 {
 aio_co_enter(bdrv_get_aio_context(bs), co);
-- 
2.25.4




[PATCH v8 13/14] block: Add bdrv_lock()/unlock()

2020-10-05 Thread Kevin Wolf
Inside of coroutine context, we can't directly use aio_context_acquire()
for the AioContext of a block node because we already own the lock of
the current AioContext and we need to avoid double locking to prevent
deadlocks.

This provides helper functions to lock the AioContext of a node only if
it's not the same as the current AioContext.

Signed-off-by: Kevin Wolf 
---
 include/block/block.h | 14 ++
 block.c   | 27 +++
 2 files changed, 41 insertions(+)

diff --git a/include/block/block.h b/include/block/block.h
index b5fa7b2229..65d79270e6 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -643,6 +643,20 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState 
*bs);
  */
 void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
 
+/**
+ * Locks the AioContext of @bs if it's not the current AioContext. This avoids
+ * double locking which could lead to deadlocks: This is a coroutine_fn, so we
+ * know we already own the lock of the current AioContext.
+ *
+ * May only be called in the main thread.
+ */
+void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
+
+/**
+ * Unlocks the AioContext of @bs if it's not the current AioContext.
+ */
+void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
+
 /**
  * Transfer control to @co in the aio context of @bs
  */
diff --git a/block.c b/block.c
index 5eac2683b2..e39d5fa3db 100644
--- a/block.c
+++ b/block.c
@@ -6395,6 +6395,33 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, 
AioContext *old_ctx)
 bdrv_dec_in_flight(bs);
 }
 
+void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
+{
+AioContext *ctx = bdrv_get_aio_context(bs);
+
+/* In the main thread, bs->aio_context won't change concurrently */
+assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+
+/*
+ * We're in coroutine context, so we already hold the lock of the main
+ * loop AioContext. Don't lock it twice to avoid deadlocks.
+ */
+assert(qemu_in_coroutine());
+if (ctx != qemu_get_aio_context()) {
+aio_context_acquire(ctx);
+}
+}
+
+void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
+{
+AioContext *ctx = bdrv_get_aio_context(bs);
+
+assert(qemu_in_coroutine());
+if (ctx != qemu_get_aio_context()) {
+aio_context_release(ctx);
+}
+}
+
 void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
 {
 aio_co_enter(bdrv_get_aio_context(bs), co);
-- 
2.25.4




[PATCH v8 10/14] hmp: Add support for coroutine command handlers

2020-10-05 Thread Kevin Wolf
Often, QMP command handlers are not only called to handle QMP commands,
but also from a corresponding HMP command handler. In order to give them
a consistent environment, optionally run HMP command handlers in a
coroutine, too.

The implementation is a lot simpler than in QMP because for HMP, we
still block the VM while the coroutine is running.

Signed-off-by: Kevin Wolf 
Reviewed-by: Dr. David Alan Gilbert 
---
 docs/devel/qapi-code-gen.txt |  4 ++--
 monitor/monitor-internal.h   |  1 +
 monitor/hmp.c| 37 +++-
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/docs/devel/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt
index 4a41e36a75..c6438c6aa9 100644
--- a/docs/devel/qapi-code-gen.txt
+++ b/docs/devel/qapi-code-gen.txt
@@ -617,8 +617,8 @@ pitfalls are:
 
 Since the command handler may assume coroutine context, any callers
 other than the QMP dispatcher must also call it in coroutine context.
-In particular, HMP commands calling such a QMP command handler must
-enter coroutine context before calling the handler.
+In particular, HMP commands calling such a QMP command handler must be
+marked .coroutine = true in hmp-commands.hx.
 
 It is an error to specify both 'coroutine': true and 'allow-oob': true
 for a command.  We don't currently have a use case for both together and
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
index b55d6df07f..ad2e64be13 100644
--- a/monitor/monitor-internal.h
+++ b/monitor/monitor-internal.h
@@ -74,6 +74,7 @@ typedef struct HMPCommand {
 const char *help;
 const char *flags; /* p=preconfig */
 void (*cmd)(Monitor *mon, const QDict *qdict);
+bool coroutine;
 /*
  * @sub_table is a list of 2nd level of commands. If it does not exist,
  * cmd should be used. If it exists, sub_table[?].cmd should be
diff --git a/monitor/hmp.c b/monitor/hmp.c
index abaf939b2d..c5cd9d372b 100644
--- a/monitor/hmp.c
+++ b/monitor/hmp.c
@@ -1056,12 +1056,26 @@ fail:
 return NULL;
 }
 
+typedef struct HandleHmpCommandCo {
+Monitor *mon;
+const HMPCommand *cmd;
+QDict *qdict;
+bool done;
+} HandleHmpCommandCo;
+
+static void handle_hmp_command_co(void *opaque)
+{
+HandleHmpCommandCo *data = opaque;
+data->cmd->cmd(data->mon, data->qdict);
+monitor_set_cur(qemu_coroutine_self(), NULL);
+data->done = true;
+}
+
 void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
 {
 QDict *qdict;
 const HMPCommand *cmd;
 const char *cmd_start = cmdline;
-Monitor *old_mon;
 
 trace_handle_hmp_command(mon, cmdline);
 
@@ -1080,10 +1094,23 @@ void handle_hmp_command(MonitorHMP *mon, const char 
*cmdline)
 return;
 }
 
-/* old_mon is non-NULL when called from qmp_human_monitor_command() */
-old_mon = monitor_set_cur(qemu_coroutine_self(), >common);
-cmd->cmd(>common, qdict);
-monitor_set_cur(qemu_coroutine_self(), old_mon);
+if (!cmd->coroutine) {
+/* old_mon is non-NULL when called from qmp_human_monitor_command() */
+Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), 
>common);
+cmd->cmd(>common, qdict);
+monitor_set_cur(qemu_coroutine_self(), old_mon);
+} else {
+HandleHmpCommandCo data = {
+.mon = >common,
+.cmd = cmd,
+.qdict = qdict,
+.done = false,
+};
+Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, );
+monitor_set_cur(co, >common);
+aio_co_enter(qemu_get_aio_context(), co);
+AIO_WAIT_WHILE(qemu_get_aio_context(), !data.done);
+}
 
 qobject_unref(qdict);
 }
-- 
2.25.4




[PATCH v8 08/14] qapi: Add a 'coroutine' flag for commands

2020-10-05 Thread Kevin Wolf
This patch adds a new 'coroutine' flag to QMP command definitions that
tells the QMP dispatcher that the command handler is safe to be run in a
coroutine.

The documentation of the new flag pretends that this flag is already
used as intended, which it isn't yet after this patch. We'll implement
this in another patch in this series.

Signed-off-by: Kevin Wolf 
---
 docs/devel/qapi-code-gen.txt| 29 +
 docs/sphinx/qapidoc.py  |  2 +-
 include/qapi/qmp/dispatch.h |  1 +
 tests/test-qmp-cmds.c   |  4 
 scripts/qapi/commands.py| 10 ++---
 scripts/qapi/expr.py| 11 --
 scripts/qapi/introspect.py  |  2 +-
 scripts/qapi/schema.py  | 13 +++
 tests/qapi-schema/test-qapi.py  |  7 +++---
 tests/qapi-schema/meson.build   |  1 +
 tests/qapi-schema/oob-coroutine.err |  2 ++
 tests/qapi-schema/oob-coroutine.json|  2 ++
 tests/qapi-schema/oob-coroutine.out |  0
 tests/qapi-schema/qapi-schema-test.json |  1 +
 tests/qapi-schema/qapi-schema-test.out  |  2 ++
 15 files changed, 73 insertions(+), 14 deletions(-)
 create mode 100644 tests/qapi-schema/oob-coroutine.err
 create mode 100644 tests/qapi-schema/oob-coroutine.json
 create mode 100644 tests/qapi-schema/oob-coroutine.out

diff --git a/docs/devel/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt
index 5fc67c99cd..4a41e36a75 100644
--- a/docs/devel/qapi-code-gen.txt
+++ b/docs/devel/qapi-code-gen.txt
@@ -472,6 +472,7 @@ Syntax:
 '*gen': false,
 '*allow-oob': true,
 '*allow-preconfig': true,
+'*coroutine': true,
 '*if': COND,
 '*features': FEATURES }
 
@@ -596,6 +597,34 @@ before the machine is built.  It defaults to false.  For 
example:
 QMP is available before the machine is built only when QEMU was
 started with --preconfig.
 
+Member 'coroutine' tells the QMP dispatcher whether the command handler
+is safe to be run in a coroutine.  It defaults to false.  If it is true,
+the command handler is called from coroutine context and may yield while
+waiting for an external event (such as I/O completion) in order to avoid
+blocking the guest and other background operations.
+
+Coroutine safety can be hard to prove, similar to thread safety.  Common
+pitfalls are:
+
+- The global mutex isn't held across qemu_coroutine_yield(), so
+  operations that used to assume that they execute atomically may have
+  to be more careful to protect against changes in the global state.
+
+- Nested event loops (AIO_WAIT_WHILE() etc.) are problematic in
+  coroutine context and can easily lead to deadlocks.  They should be
+  replaced by yielding and reentering the coroutine when the condition
+  becomes false.
+
+Since the command handler may assume coroutine context, any callers
+other than the QMP dispatcher must also call it in coroutine context.
+In particular, HMP commands calling such a QMP command handler must
+enter coroutine context before calling the handler.
+
+It is an error to specify both 'coroutine': true and 'allow-oob': true
+for a command.  We don't currently have a use case for both together and
+without a use case, it's not entirely clear what the semantics should
+be.
+
 The optional 'if' member specifies a conditional.  See "Configuring
 the schema" below for more on this.
 
diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py
index 6944ffa6aa..e03abcbb95 100644
--- a/docs/sphinx/qapidoc.py
+++ b/docs/sphinx/qapidoc.py
@@ -330,7 +330,7 @@ class QAPISchemaGenRSTVisitor(QAPISchemaVisitor):
 
 def visit_command(self, name, info, ifcond, features, arg_type,
   ret_type, gen, success_response, boxed, allow_oob,
-  allow_preconfig):
+  allow_preconfig, coroutine):
 doc = self._cur_doc
 self._add_doc('Command',
   self._nodes_for_arguments(doc,
diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h
index 0c2f467028..9fd2b720a7 100644
--- a/include/qapi/qmp/dispatch.h
+++ b/include/qapi/qmp/dispatch.h
@@ -25,6 +25,7 @@ typedef enum QmpCommandOptions
 QCO_NO_SUCCESS_RESP   =  (1U << 0),
 QCO_ALLOW_OOB =  (1U << 1),
 QCO_ALLOW_PRECONFIG   =  (1U << 2),
+QCO_COROUTINE =  (1U << 3),
 } QmpCommandOptions;
 
 typedef struct QmpCommand
diff --git a/tests/test-qmp-cmds.c b/tests/test-qmp-cmds.c
index 5f1b245e19..d3413bfef0 100644
--- a/tests/test-qmp-cmds.c
+++ b/tests/test-qmp-cmds.c
@@ -36,6 +36,10 @@ void qmp_cmd_success_response(Error **errp)
 {
 }
 
+void qmp_coroutine_cmd(Error **errp)
+{
+}
+
 Empty2 *qmp_user_def_cmd0(Error **errp)
 {
 return g_new0(Empty2, 1);
diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
index 3cf9e1110b..6e6fc94a14 100644
--- a/scripts/qapi/commands.py
+++ b/scripts/qapi/commands.py

[PATCH v8 11/14] util/async: Add aio_co_reschedule_self()

2020-10-05 Thread Kevin Wolf
Add a function that can be used to move the currently running coroutine
to a different AioContext (and therefore potentially a different
thread).

Signed-off-by: Kevin Wolf 
Reviewed-by: Stefan Hajnoczi 
---
 include/block/aio.h | 10 ++
 util/async.c| 30 ++
 2 files changed, 40 insertions(+)

diff --git a/include/block/aio.h b/include/block/aio.h
index ec8c5af642..5f342267d5 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -17,6 +17,7 @@
 #ifdef CONFIG_LINUX_IO_URING
 #include 
 #endif
+#include "qemu/coroutine.h"
 #include "qemu/queue.h"
 #include "qemu/event_notifier.h"
 #include "qemu/thread.h"
@@ -654,6 +655,15 @@ static inline bool aio_node_check(AioContext *ctx, bool 
is_external)
  */
 void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
 
+/**
+ * aio_co_reschedule_self:
+ * @new_ctx: the new context
+ *
+ * Move the currently running coroutine to new_ctx. If the coroutine is already
+ * running in new_ctx, do nothing.
+ */
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx);
+
 /**
  * aio_co_wake:
  * @co: the coroutine
diff --git a/util/async.c b/util/async.c
index f758354c6a..674dbefb7c 100644
--- a/util/async.c
+++ b/util/async.c
@@ -569,6 +569,36 @@ void aio_co_schedule(AioContext *ctx, Coroutine *co)
 aio_context_unref(ctx);
 }
 
+typedef struct AioCoRescheduleSelf {
+Coroutine *co;
+AioContext *new_ctx;
+} AioCoRescheduleSelf;
+
+static void aio_co_reschedule_self_bh(void *opaque)
+{
+AioCoRescheduleSelf *data = opaque;
+aio_co_schedule(data->new_ctx, data->co);
+}
+
+void coroutine_fn aio_co_reschedule_self(AioContext *new_ctx)
+{
+AioContext *old_ctx = qemu_get_current_aio_context();
+
+if (old_ctx != new_ctx) {
+AioCoRescheduleSelf data = {
+.co = qemu_coroutine_self(),
+.new_ctx = new_ctx,
+};
+/*
+ * We can't directly schedule the coroutine in the target context
+ * because this would be racy: The other thread could try to enter the
+ * coroutine before it has yielded in this one.
+ */
+aio_bh_schedule_oneshot(old_ctx, aio_co_reschedule_self_bh, );
+qemu_coroutine_yield();
+}
+}
+
 void aio_co_wake(struct Coroutine *co)
 {
 AioContext *ctx;
-- 
2.25.4




[PATCH v8 07/14] monitor: Make current monitor a per-coroutine property

2020-10-05 Thread Kevin Wolf
This way, a monitor command handler will still be able to access the
current monitor, but when it yields, all other code code will correctly
get NULL from monitor_cur().

This uses a hash table to map the coroutine pointer to the current
monitor of that coroutine.  Outside of coroutine context, we associate
the current monitor with the leader coroutine of the current thread.

Approaches to implement some form of coroutine local storage directly in
the coroutine core code have been considered and discarded because they
didn't end up being much more generic than the hash table and their
performance impact on coroutines not using coroutine local storage was
unclear. As the block layer uses a coroutine per I/O request, this is a
fast path and we have to be careful. It's safest to just stay out of
this path with code only used by the monitor.

Signed-off-by: Kevin Wolf 
Reviewed-by: Eric Blake 
---
 include/monitor/monitor.h |  2 +-
 monitor/hmp.c |  4 ++--
 monitor/monitor.c | 34 +++---
 qapi/qmp-dispatch.c   |  4 ++--
 stubs/monitor-core.c  |  2 +-
 5 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 543eafcb76..348bfad3d5 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -13,7 +13,7 @@ typedef struct MonitorOptions MonitorOptions;
 extern QemuOptsList qemu_mon_opts;
 
 Monitor *monitor_cur(void);
-Monitor *monitor_set_cur(Monitor *mon);
+Monitor *monitor_set_cur(Coroutine *co, Monitor *mon);
 bool monitor_cur_is_qmp(void);
 
 void monitor_init_globals(void);
diff --git a/monitor/hmp.c b/monitor/hmp.c
index 560ec98e7b..abaf939b2d 100644
--- a/monitor/hmp.c
+++ b/monitor/hmp.c
@@ -1081,9 +1081,9 @@ void handle_hmp_command(MonitorHMP *mon, const char 
*cmdline)
 }
 
 /* old_mon is non-NULL when called from qmp_human_monitor_command() */
-old_mon = monitor_set_cur(>common);
+old_mon = monitor_set_cur(qemu_coroutine_self(), >common);
 cmd->cmd(>common, qdict);
-monitor_set_cur(old_mon);
+monitor_set_cur(qemu_coroutine_self(), old_mon);
 
 qobject_unref(qdict);
 }
diff --git a/monitor/monitor.c b/monitor/monitor.c
index 099c164c6d..ef68ca9d21 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -58,29 +58,48 @@ IOThread *mon_iothread;
 /* Bottom half to dispatch the requests received from I/O thread */
 QEMUBH *qmp_dispatcher_bh;
 
-/* Protects mon_list, monitor_qapi_event_state, monitor_destroyed.  */
+/*
+ * Protects mon_list, monitor_qapi_event_state, coroutine_mon,
+ * monitor_destroyed.
+ */
 QemuMutex monitor_lock;
 static GHashTable *monitor_qapi_event_state;
+static GHashTable *coroutine_mon; /* Maps Coroutine* to Monitor* */
 
 MonitorList mon_list;
 int mon_refcount;
 static bool monitor_destroyed;
 
-static __thread Monitor *cur_monitor;
-
 Monitor *monitor_cur(void)
 {
-return cur_monitor;
+Monitor *mon;
+
+qemu_mutex_lock(_lock);
+mon = g_hash_table_lookup(coroutine_mon, qemu_coroutine_self());
+qemu_mutex_unlock(_lock);
+
+return mon;
 }
 
 /**
  * Sets a new current monitor and returns the old one.
+ *
+ * If a non-NULL monitor is set for a coroutine, another call
+ * resetting it to NULL is required before the coroutine terminates,
+ * otherwise a stale entry would remain in the hash table.
  */
-Monitor *monitor_set_cur(Monitor *mon)
+Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
 {
-Monitor *old_monitor = cur_monitor;
+Monitor *old_monitor = monitor_cur();
+
+qemu_mutex_lock(_lock);
+if (mon) {
+g_hash_table_replace(coroutine_mon, co, mon);
+} else {
+g_hash_table_remove(coroutine_mon, co);
+}
+qemu_mutex_unlock(_lock);
 
-cur_monitor = mon;
 return old_monitor;
 }
 
@@ -623,6 +642,7 @@ void monitor_init_globals_core(void)
 {
 monitor_qapi_event_init();
 qemu_mutex_init(_lock);
+coroutine_mon = g_hash_table_new(NULL, NULL);
 
 /*
  * The dispatcher BH must run in the main loop thread, since we
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 2fdbc0fba4..5677ba92ca 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -154,11 +154,11 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject 
*request,
 }
 
 assert(monitor_cur() == NULL);
-monitor_set_cur(cur_mon);
+monitor_set_cur(qemu_coroutine_self(), cur_mon);
 
 cmd->fn(args, , );
 
-monitor_set_cur(NULL);
+monitor_set_cur(qemu_coroutine_self(), NULL);
 qobject_unref(args);
 if (err) {
 /* or assert(!ret) after reviewing all handlers: */
diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
index dc1748bf13..d058a2a00d 100644
--- a/stubs/monitor-core.c
+++ b/stubs/monitor-core.c
@@ -8,7 +8,7 @@ Monitor *monitor_cur(void)
 return NULL;
 }
 
-Monitor *monitor_set_cur(Monitor *mon)
+Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
 {
 return NULL;
 }
-- 
2.25.4




[PATCH v8 06/14] qmp: Call monitor_set_cur() only in qmp_dispatch()

2020-10-05 Thread Kevin Wolf
The correct way to set the current monitor for a coroutine handler will
be different than for a blocking handler, so monitor_set_cur() needs to
be called in qmp_dispatch().

Signed-off-by: Kevin Wolf 
---
 include/qapi/qmp/dispatch.h | 3 ++-
 monitor/qmp.c   | 9 ++---
 qapi/qmp-dispatch.c | 8 +++-
 qga/main.c  | 2 +-
 stubs/monitor-core.c| 5 +
 tests/test-qmp-cmds.c   | 6 +++---
 6 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h
index 5a9cf82472..0c2f467028 100644
--- a/include/qapi/qmp/dispatch.h
+++ b/include/qapi/qmp/dispatch.h
@@ -14,6 +14,7 @@
 #ifndef QAPI_QMP_DISPATCH_H
 #define QAPI_QMP_DISPATCH_H
 
+#include "monitor/monitor.h"
 #include "qemu/queue.h"
 
 typedef void (QmpCommandFunc)(QDict *, QObject **, Error **);
@@ -49,7 +50,7 @@ const char *qmp_command_name(const QmpCommand *cmd);
 bool qmp_has_success_response(const QmpCommand *cmd);
 QDict *qmp_error_response(Error *err);
 QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
-bool allow_oob);
+bool allow_oob, Monitor *cur_mon);
 bool qmp_is_oob(const QDict *dict);
 
 typedef void (*qmp_cmd_callback_fn)(const QmpCommand *cmd, void *opaque);
diff --git a/monitor/qmp.c b/monitor/qmp.c
index 8469970c69..e746b3575d 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -135,16 +135,11 @@ static void monitor_qmp_respond(MonitorQMP *mon, QDict 
*rsp)
 
 static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
 {
-Monitor *old_mon;
 QDict *rsp;
 QDict *error;
 
-old_mon = monitor_set_cur(>common);
-assert(old_mon == NULL);
-
-rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon));
-
-monitor_set_cur(NULL);
+rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon),
+   >common);
 
 if (mon->commands == _cap_negotiation_commands) {
 error = qdict_get_qdict(rsp, "error");
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 79347e0864..2fdbc0fba4 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -89,7 +89,7 @@ bool qmp_is_oob(const QDict *dict)
 }
 
 QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
-bool allow_oob)
+bool allow_oob, Monitor *cur_mon)
 {
 Error *err = NULL;
 bool oob;
@@ -152,7 +152,13 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject 
*request,
 args = qdict_get_qdict(dict, "arguments");
 qobject_ref(args);
 }
+
+assert(monitor_cur() == NULL);
+monitor_set_cur(cur_mon);
+
 cmd->fn(args, , );
+
+monitor_set_cur(NULL);
 qobject_unref(args);
 if (err) {
 /* or assert(!ret) after reviewing all handlers: */
diff --git a/qga/main.c b/qga/main.c
index 740f5f7303..dea6a3aa64 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -579,7 +579,7 @@ static void process_event(void *opaque, QObject *obj, Error 
*err)
 }
 
 g_debug("processing command");
-rsp = qmp_dispatch(_commands, obj, false);
+rsp = qmp_dispatch(_commands, obj, false, NULL);
 
 end:
 ret = send_response(s, rsp);
diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
index 0cd2d864b2..dc1748bf13 100644
--- a/stubs/monitor-core.c
+++ b/stubs/monitor-core.c
@@ -8,6 +8,11 @@ Monitor *monitor_cur(void)
 return NULL;
 }
 
+Monitor *monitor_set_cur(Monitor *mon)
+{
+return NULL;
+}
+
 void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
 {
 }
diff --git a/tests/test-qmp-cmds.c b/tests/test-qmp-cmds.c
index d12ff47e26..5f1b245e19 100644
--- a/tests/test-qmp-cmds.c
+++ b/tests/test-qmp-cmds.c
@@ -152,7 +152,7 @@ static QObject *do_qmp_dispatch(bool allow_oob, const char 
*template, ...)
 req = qdict_from_vjsonf_nofail(template, ap);
 va_end(ap);
 
-resp = qmp_dispatch(_commands, QOBJECT(req), allow_oob);
+resp = qmp_dispatch(_commands, QOBJECT(req), allow_oob, NULL);
 g_assert(resp);
 ret = qdict_get(resp, "return");
 g_assert(ret);
@@ -175,7 +175,7 @@ static void do_qmp_dispatch_error(bool allow_oob, 
ErrorClass cls,
 req = qdict_from_vjsonf_nofail(template, ap);
 va_end(ap);
 
-resp = qmp_dispatch(_commands, QOBJECT(req), allow_oob);
+resp = qmp_dispatch(_commands, QOBJECT(req), allow_oob, NULL);
 g_assert(resp);
 error = qdict_get_qdict(resp, "error");
 g_assert(error);
@@ -231,7 +231,7 @@ static void test_dispatch_cmd_success_response(void)
 QDict *resp;
 
 qdict_put_str(req, "execute", "cmd-success-response");
-resp = qmp_dispatch(_commands, QOBJECT(req), false);
+resp = qmp_dispatch(_commands, QOBJECT(req), false, NULL);
 g_assert_null(resp);
 qobject_unref(req);
 }
-- 
2.25.4




[PATCH v8 03/14] monitor: Use getter/setter functions for cur_mon

2020-10-05 Thread Kevin Wolf
cur_mon really needs to be coroutine-local as soon as we move monitor
command handlers to coroutines and let them yield. As a first step, just
remove all direct accesses to cur_mon so that we can implement this in
the getter function later.

Signed-off-by: Kevin Wolf 
---
 include/monitor/monitor.h  |  3 ++-
 audio/wavcapture.c |  8 
 dump/dump.c|  2 +-
 hw/scsi/vhost-scsi.c   |  2 +-
 hw/virtio/vhost-vsock.c|  2 +-
 migration/fd.c |  4 ++--
 monitor/hmp.c  | 11 +--
 monitor/misc.c | 13 +++--
 monitor/monitor.c  | 24 +++-
 monitor/qmp-cmds-control.c |  2 ++
 monitor/qmp-cmds.c |  2 +-
 monitor/qmp.c  |  7 ++-
 net/socket.c   |  2 +-
 net/tap.c  |  6 +++---
 softmmu/cpus.c |  2 +-
 stubs/monitor-core.c   |  5 -
 tests/test-util-sockets.c  | 12 ++--
 trace/control.c|  2 +-
 util/qemu-error.c  |  6 +++---
 util/qemu-print.c  |  3 ++-
 util/qemu-sockets.c|  1 +
 21 files changed, 73 insertions(+), 46 deletions(-)

diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 93bedf0b75..543eafcb76 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -5,7 +5,6 @@
 #include "qapi/qapi-types-misc.h"
 #include "qemu/readline.h"
 
-extern __thread Monitor *cur_mon;
 typedef struct MonitorHMP MonitorHMP;
 typedef struct MonitorOptions MonitorOptions;
 
@@ -13,6 +12,8 @@ typedef struct MonitorOptions MonitorOptions;
 
 extern QemuOptsList qemu_mon_opts;
 
+Monitor *monitor_cur(void);
+Monitor *monitor_set_cur(Monitor *mon);
 bool monitor_cur_is_qmp(void);
 
 void monitor_init_globals(void);
diff --git a/audio/wavcapture.c b/audio/wavcapture.c
index 17e87ed6f4..c60286e162 100644
--- a/audio/wavcapture.c
+++ b/audio/wavcapture.c
@@ -1,5 +1,5 @@
 #include "qemu/osdep.h"
-#include "monitor/monitor.h"
+#include "qemu/qemu-print.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "audio.h"
@@ -94,9 +94,9 @@ static void wav_capture_info (void *opaque)
 WAVState *wav = opaque;
 char *path = wav->path;
 
-monitor_printf (cur_mon, "Capturing audio(%d,%d,%d) to %s: %d bytes\n",
-wav->freq, wav->bits, wav->nchannels,
-path ? path : "", wav->bytes);
+qemu_printf("Capturing audio(%d,%d,%d) to %s: %d bytes\n",
+wav->freq, wav->bits, wav->nchannels,
+path ? path : "", wav->bytes);
 }
 
 static struct capture_ops wav_capture_ops = {
diff --git a/dump/dump.c b/dump/dump.c
index 45da46a952..dec32468d9 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -1986,7 +1986,7 @@ void qmp_dump_guest_memory(bool paging, const char *file,
 
 #if !defined(WIN32)
 if (strstart(file, "fd:", )) {
-fd = monitor_get_fd(cur_mon, p, errp);
+fd = monitor_get_fd(monitor_cur(), p, errp);
 if (fd == -1) {
 return;
 }
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index a83ffeefc8..4d70fa036b 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -177,7 +177,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error 
**errp)
 }
 
 if (vs->conf.vhostfd) {
-vhostfd = monitor_fd_param(cur_mon, vs->conf.vhostfd, errp);
+vhostfd = monitor_fd_param(monitor_cur(), vs->conf.vhostfd, errp);
 if (vhostfd == -1) {
 error_prepend(errp, "vhost-scsi: unable to parse vhostfd: ");
 return;
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
index c8f0699b4f..f9db4beb47 100644
--- a/hw/virtio/vhost-vsock.c
+++ b/hw/virtio/vhost-vsock.c
@@ -143,7 +143,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, 
Error **errp)
 }
 
 if (vsock->conf.vhostfd) {
-vhostfd = monitor_fd_param(cur_mon, vsock->conf.vhostfd, errp);
+vhostfd = monitor_fd_param(monitor_cur(), vsock->conf.vhostfd, errp);
 if (vhostfd == -1) {
 error_prepend(errp, "vhost-vsock: unable to parse vhostfd: ");
 return;
diff --git a/migration/fd.c b/migration/fd.c
index 0a29ecdebf..6f2f50475f 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -26,7 +26,7 @@
 void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error 
**errp)
 {
 QIOChannel *ioc;
-int fd = monitor_get_fd(cur_mon, fdname, errp);
+int fd = monitor_get_fd(monitor_cur(), fdname, errp);
 if (fd == -1) {
 return;
 }
@@ -55,7 +55,7 @@ static gboolean fd_accept_incoming_migration(QIOChannel *ioc,
 void fd_start_incoming_migration(const char *fdname, Error **errp)
 {
 QIOChannel *ioc;
-int fd = monitor_fd_param(cur_mon, fdname, errp);
+int fd = monitor_fd_param(monitor_cur(), fdname, errp);
 if (fd == -1) {
 return;
 }
diff --git a/monitor/hmp.c b/monitor/hmp.c
index 4ecdefd705..e0cc9e65dd 100644
--- a/monitor/hmp.c
+++ 

[PATCH v8 00/14] monitor: Optionally run handlers in coroutines

2020-10-05 Thread Kevin Wolf
Some QMP command handlers can block the main loop for a relatively long
time, for example because they perform some I/O. This is quite nasty.
Allowing such handlers to run in a coroutine where they can yield (and
therefore release the BQL) while waiting for an event such as I/O
completion solves the problem.

This series adds the infrastructure to allow this and switches
block_resize to run in a coroutine as a first example.

This is an alternative solution to Marc-André's "monitor: add
asynchronous command type" series.

v8:
- Replaced bdrv_co_move_to_aio_context() with bdrv_co_enter()/leave(),
  added bdrv_co_lock()/unlock() and fixed the iothread support in
  qmp_block_resize() with the new functions [Stefan]
- More detailed documentation of 'coroutine' flag for commands [Markus]
- Improved comment for dropping out of coroutine context in
  qmp_dispatch() [Markus]
- Coding style changes [Markus]
- Resolve some more merge conflicts on rebase

v7:
- Added patch 2 to add a Monitor parameter to monitor_get_cpu_index(),
  too [Markus]
- Let monitor_set_cur() return the old monitor [Markus]
- Fixed comment about linking stub objects in test-util-sockets [Markus]
- More detailed commit message for per-coroutine current monitor and
  document that monitor_set_cur(NULL) must be called eventually [Markus]
- Resolve some merge conflicts on rebase

v6:
- Fixed cur_mon behaviour: It should always point to the Monitor while
  we're in the handler coroutine, but be NULL while the handler
  coroutines has yielded. [Markus]
- Give HMP handlers the coroutine option, too, because they will call
  QMP handlers, and life is easier when we know whether we are in
  coroutine context or not.
- Fixed block_resize for block devices in iothreads and for HMP
- Resolved some merge conflict with QAPI generator and monitor
  refactorings that were merged in the meantime

v5:
- Improved comments and documentation [Markus]

v4:
- Forbid 'oob': true, 'coroutine': true [Markus]
- Removed Python type hints [Markus]
- Introduced separate bool qmp_dispatcher_co_shutdown to make it clearer
  how a shutdown request is signalled to the dispatcher [Markus]
- Allow using aio_poll() with iohandler_ctx and use that instead of
  aio_bh_poll() [Markus]
- Removed coroutine_fn from qmp_block_resize() again because at least
  one caller (HMP) calls it outside of coroutine context [Markus]
- Tried to make the synchronisation between dispatcher and the monitor
  thread clearer, and fixed a race condition
- Improved documentation and comments

v3:
- Fix race between monitor thread and dispatcher that could schedule the
  dispatcher coroutine twice if a second requests comes in before the
  dispatcher can wake up [Patchew]

v2:
- Fix typo in a commit message [Eric]
- Use hyphen instead of underscore for the test command [Eric]
- Mark qmp_block_resize() as coroutine_fn [Stefan]


Kevin Wolf (14):
  monitor: Add Monitor parameter to monitor_set_cpu()
  monitor: Add Monitor parameter to monitor_get_cpu_index()
  monitor: Use getter/setter functions for cur_mon
  hmp: Update current monitor only in handle_hmp_command()
  qmp: Assert that no other monitor is active
  qmp: Call monitor_set_cur() only in qmp_dispatch()
  monitor: Make current monitor a per-coroutine property
  qapi: Add a 'coroutine' flag for commands
  qmp: Move dispatcher to a coroutine
  hmp: Add support for coroutine command handlers
  util/async: Add aio_co_reschedule_self()
  block: Add bdrv_co_enter()/leave()
  block: Add bdrv_lock()/unlock()
  block: Convert 'block_resize' to coroutine

 qapi/block-core.json|   3 +-
 docs/devel/qapi-code-gen.txt|  29 ++
 docs/sphinx/qapidoc.py  |   2 +-
 include/block/aio.h |  10 ++
 include/block/block.h   |  31 ++
 include/monitor/monitor.h   |   7 +-
 include/qapi/qmp/dispatch.h |   5 +-
 monitor/monitor-internal.h  |   7 +-
 audio/wavcapture.c  |   8 +-
 block.c |  50 +
 blockdev.c  |  16 +--
 dump/dump.c |   2 +-
 hw/core/machine-hmp-cmds.c  |   2 +-
 hw/scsi/vhost-scsi.c|   2 +-
 hw/virtio/vhost-vsock.c |   2 +-
 migration/fd.c  |   4 +-
 monitor/hmp-cmds.c  |   4 +-
 monitor/hmp.c   |  44 ++--
 monitor/misc.c  |  38 +++
 monitor/monitor.c   | 101 --
 monitor/qmp-cmds-control.c  |   2 +
 monitor/qmp-cmds.c  |   2 +-
 monitor/qmp.c   | 131 +---
 net/socket.c|   2 +-
 net/tap.c   |   6 +-
 qapi/qmp-dispatch.c |  65 +++-
 qapi/qmp-registry.c |   3 +
 

[PATCH v8 02/14] monitor: Add Monitor parameter to monitor_get_cpu_index()

2020-10-05 Thread Kevin Wolf
Most callers actually don't have to rely on cur_mon, but already know
for which monitor they call monitor_get_cpu_index().

Signed-off-by: Kevin Wolf 
---
 include/monitor/monitor.h  |  2 +-
 hw/core/machine-hmp-cmds.c |  2 +-
 monitor/hmp-cmds.c |  2 +-
 monitor/misc.c | 20 ++--
 softmmu/cpus.c |  2 +-
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 04f472ac4f..93bedf0b75 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -34,7 +34,7 @@ int monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
 int monitor_printf(Monitor *mon, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
 void monitor_flush(Monitor *mon);
 int monitor_set_cpu(Monitor *mon, int cpu_index);
-int monitor_get_cpu_index(void);
+int monitor_get_cpu_index(Monitor *mon);
 
 void monitor_read_command(MonitorHMP *mon, int show_prompt);
 int monitor_read_password(MonitorHMP *mon, ReadLineFunc *readline_func,
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index f4092b98cc..6357be9c6b 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -34,7 +34,7 @@ void hmp_info_cpus(Monitor *mon, const QDict *qdict)
 for (cpu = cpu_list; cpu; cpu = cpu->next) {
 int active = ' ';
 
-if (cpu->value->cpu_index == monitor_get_cpu_index()) {
+if (cpu->value->cpu_index == monitor_get_cpu_index(mon)) {
 active = '*';
 }
 
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 0c0a03f824..9789f4277f 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1009,7 +1009,7 @@ void hmp_memsave(Monitor *mon, const QDict *qdict)
 const char *filename = qdict_get_str(qdict, "filename");
 uint64_t addr = qdict_get_int(qdict, "val");
 Error *err = NULL;
-int cpu_index = monitor_get_cpu_index();
+int cpu_index = monitor_get_cpu_index(mon);
 
 if (cpu_index < 0) {
 monitor_printf(mon, "No CPU available\n");
diff --git a/monitor/misc.c b/monitor/misc.c
index 25b42593cc..4ea575eea8 100644
--- a/monitor/misc.c
+++ b/monitor/misc.c
@@ -269,23 +269,23 @@ int monitor_set_cpu(Monitor *mon, int cpu_index)
 }
 
 /* Callers must hold BQL. */
-static CPUState *mon_get_cpu_sync(bool synchronize)
+static CPUState *mon_get_cpu_sync(Monitor *mon, bool synchronize)
 {
 CPUState *cpu = NULL;
 
-if (cur_mon->mon_cpu_path) {
-cpu = (CPUState *) object_resolve_path_type(cur_mon->mon_cpu_path,
+if (mon->mon_cpu_path) {
+cpu = (CPUState *) object_resolve_path_type(mon->mon_cpu_path,
 TYPE_CPU, NULL);
 if (!cpu) {
-g_free(cur_mon->mon_cpu_path);
-cur_mon->mon_cpu_path = NULL;
+g_free(mon->mon_cpu_path);
+mon->mon_cpu_path = NULL;
 }
 }
-if (!cur_mon->mon_cpu_path) {
+if (!mon->mon_cpu_path) {
 if (!first_cpu) {
 return NULL;
 }
-monitor_set_cpu(cur_mon, first_cpu->cpu_index);
+monitor_set_cpu(mon, first_cpu->cpu_index);
 cpu = first_cpu;
 }
 assert(cpu != NULL);
@@ -297,7 +297,7 @@ static CPUState *mon_get_cpu_sync(bool synchronize)
 
 CPUState *mon_get_cpu(void)
 {
-return mon_get_cpu_sync(true);
+return mon_get_cpu_sync(cur_mon, true);
 }
 
 CPUArchState *mon_get_cpu_env(void)
@@ -307,9 +307,9 @@ CPUArchState *mon_get_cpu_env(void)
 return cs ? cs->env_ptr : NULL;
 }
 
-int monitor_get_cpu_index(void)
+int monitor_get_cpu_index(Monitor *mon)
 {
-CPUState *cs = mon_get_cpu_sync(false);
+CPUState *cs = mon_get_cpu_sync(mon, false);
 
 return cs ? cs->cpu_index : UNASSIGNED_CPU_INDEX;
 }
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index ac8940d52e..12e54f9010 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -2224,7 +2224,7 @@ exit:
 
 void qmp_inject_nmi(Error **errp)
 {
-nmi_monitor_handle(monitor_get_cpu_index(), errp);
+nmi_monitor_handle(monitor_get_cpu_index(cur_mon), errp);
 }
 
 void dump_drift_info(void)
-- 
2.25.4




[PATCH v8 09/14] qmp: Move dispatcher to a coroutine

2020-10-05 Thread Kevin Wolf
This moves the QMP dispatcher to a coroutine and runs all QMP command
handlers that declare 'coroutine': true in coroutine context so they
can avoid blocking the main loop while doing I/O or waiting for other
events.

For commands that are not declared safe to run in a coroutine, the
dispatcher drops out of coroutine context by calling the QMP command
handler from a bottom half.

Signed-off-by: Kevin Wolf 
Reviewed-by: Markus Armbruster 
---
 include/qapi/qmp/dispatch.h |   1 +
 monitor/monitor-internal.h  |   6 +-
 monitor/monitor.c   |  55 +---
 monitor/qmp.c   | 122 +++-
 qapi/qmp-dispatch.c |  65 +--
 qapi/qmp-registry.c |   3 +
 util/aio-posix.c|   8 ++-
 7 files changed, 214 insertions(+), 46 deletions(-)

diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h
index 9fd2b720a7..af8d96c570 100644
--- a/include/qapi/qmp/dispatch.h
+++ b/include/qapi/qmp/dispatch.h
@@ -31,6 +31,7 @@ typedef enum QmpCommandOptions
 typedef struct QmpCommand
 {
 const char *name;
+/* Runs in coroutine context if QCO_COROUTINE is set */
 QmpCommandFunc *fn;
 QmpCommandOptions options;
 QTAILQ_ENTRY(QmpCommand) node;
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
index b39e03b744..b55d6df07f 100644
--- a/monitor/monitor-internal.h
+++ b/monitor/monitor-internal.h
@@ -155,7 +155,9 @@ static inline bool monitor_is_qmp(const Monitor *mon)
 
 typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList;
 extern IOThread *mon_iothread;
-extern QEMUBH *qmp_dispatcher_bh;
+extern Coroutine *qmp_dispatcher_co;
+extern bool qmp_dispatcher_co_shutdown;
+extern bool qmp_dispatcher_co_busy;
 extern QmpCommandList qmp_commands, qmp_cap_negotiation_commands;
 extern QemuMutex monitor_lock;
 extern MonitorList mon_list;
@@ -173,7 +175,7 @@ void monitor_fdsets_cleanup(void);
 
 void qmp_send_response(MonitorQMP *mon, const QDict *rsp);
 void monitor_data_destroy_qmp(MonitorQMP *mon);
-void monitor_qmp_bh_dispatcher(void *data);
+void coroutine_fn monitor_qmp_dispatcher_co(void *data);
 
 int get_monitor_def(int64_t *pval, const char *name);
 void help_cmd(Monitor *mon, const char *name);
diff --git a/monitor/monitor.c b/monitor/monitor.c
index ef68ca9d21..ceffe1a83b 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -55,8 +55,32 @@ typedef struct {
 /* Shared monitor I/O thread */
 IOThread *mon_iothread;
 
-/* Bottom half to dispatch the requests received from I/O thread */
-QEMUBH *qmp_dispatcher_bh;
+/* Coroutine to dispatch the requests received from I/O thread */
+Coroutine *qmp_dispatcher_co;
+
+/* Set to true when the dispatcher coroutine should terminate */
+bool qmp_dispatcher_co_shutdown;
+
+/*
+ * qmp_dispatcher_co_busy is used for synchronisation between the
+ * monitor thread and the main thread to ensure that the dispatcher
+ * coroutine never gets scheduled a second time when it's already
+ * scheduled (scheduling the same coroutine twice is forbidden).
+ *
+ * It is true if the coroutine is active and processing requests.
+ * Additional requests may then be pushed onto mon->qmp_requests,
+ * and @qmp_dispatcher_co_shutdown may be set without further ado.
+ * @qmp_dispatcher_co_busy must not be woken up in this case.
+ *
+ * If false, you also have to set @qmp_dispatcher_co_busy to true and
+ * wake up @qmp_dispatcher_co after pushing the new requests.
+ *
+ * The coroutine will automatically change this variable back to false
+ * before it yields.  Nobody else may set the variable to false.
+ *
+ * Access must be atomic for thread safety.
+ */
+bool qmp_dispatcher_co_busy;
 
 /*
  * Protects mon_list, monitor_qapi_event_state, coroutine_mon,
@@ -623,9 +647,24 @@ void monitor_cleanup(void)
 }
 qemu_mutex_unlock(_lock);
 
-/* QEMUBHs needs to be deleted before destroying the I/O thread */
-qemu_bh_delete(qmp_dispatcher_bh);
-qmp_dispatcher_bh = NULL;
+/*
+ * The dispatcher needs to stop before destroying the I/O thread.
+ *
+ * We need to poll both qemu_aio_context and iohandler_ctx to make
+ * sure that the dispatcher coroutine keeps making progress and
+ * eventually terminates.  qemu_aio_context is automatically
+ * polled by calling AIO_WAIT_WHILE on it, but we must poll
+ * iohandler_ctx manually.
+ */
+qmp_dispatcher_co_shutdown = true;
+if (!qatomic_xchg(_dispatcher_co_busy, true)) {
+aio_co_wake(qmp_dispatcher_co);
+}
+
+AIO_WAIT_WHILE(qemu_get_aio_context(),
+   (aio_poll(iohandler_get_aio_context(), false),
+qatomic_mb_read(_dispatcher_co_busy)));
+
 if (mon_iothread) {
 iothread_destroy(mon_iothread);
 mon_iothread = NULL;
@@ -649,9 +688,9 @@ void monitor_init_globals_core(void)
  * have commands assuming that context.  It would be nice to get
  * rid of those assumptions.
  */
-qmp_dispatcher_bh = 

[PULL v2 13/17] block/io: refactor save/load vmstate

2020-10-05 Thread Stefan Hajnoczi
From: Vladimir Sementsov-Ogievskiy 

Like for read/write in a previous commit, drop extra indirection layer,
generate directly bdrv_readv_vmstate() and bdrv_writev_vmstate().

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20200924185414.28642-8-vsement...@virtuozzo.com>
---
 block/coroutines.h| 10 +++
 include/block/block.h |  6 ++--
 block/io.c| 70 ++-
 3 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/block/coroutines.h b/block/coroutines.h
index 6c63a819c9..f69179f5ef 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -57,11 +57,9 @@ bdrv_common_block_status_above(BlockDriverState *bs,
int64_t *map,
BlockDriverState **file);
 
-int coroutine_fn
-bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
-   bool is_read);
-int generated_co_wrapper
-bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
-bool is_read);
+int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
+   QEMUIOVector *qiov, int64_t pos);
+int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
+QEMUIOVector *qiov, int64_t pos);
 
 #endif /* BLOCK_COROUTINES_INT_H */
diff --git a/include/block/block.h b/include/block/block.h
index eef4cceaf0..8b87df69a1 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -572,8 +572,10 @@ int path_has_protocol(const char *path);
 int path_is_absolute(const char *path);
 char *path_combine(const char *base_path, const char *filename);
 
-int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int generated_co_wrapper
+bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int generated_co_wrapper
+bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
   int64_t pos, int size);
 
diff --git a/block/io.c b/block/io.c
index c3dc1db036..54f0968aee 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2475,28 +2475,50 @@ int bdrv_is_allocated_above(BlockDriverState *top,
 }
 
 int coroutine_fn
-bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
-   bool is_read)
+bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
 BlockDriver *drv = bs->drv;
 BlockDriverState *child_bs = bdrv_primary_bs(bs);
 int ret = -ENOTSUP;
 
+if (!drv) {
+return -ENOMEDIUM;
+}
+
 bdrv_inc_in_flight(bs);
 
+if (drv->bdrv_load_vmstate) {
+ret = drv->bdrv_load_vmstate(bs, qiov, pos);
+} else if (child_bs) {
+ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
+}
+
+bdrv_dec_in_flight(bs);
+
+return ret;
+}
+
+int coroutine_fn
+bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+{
+BlockDriver *drv = bs->drv;
+BlockDriverState *child_bs = bdrv_primary_bs(bs);
+int ret = -ENOTSUP;
+
 if (!drv) {
-ret = -ENOMEDIUM;
-} else if (drv->bdrv_load_vmstate) {
-if (is_read) {
-ret = drv->bdrv_load_vmstate(bs, qiov, pos);
-} else {
-ret = drv->bdrv_save_vmstate(bs, qiov, pos);
-}
+return -ENOMEDIUM;
+}
+
+bdrv_inc_in_flight(bs);
+
+if (drv->bdrv_save_vmstate) {
+ret = drv->bdrv_save_vmstate(bs, qiov, pos);
 } else if (child_bs) {
-ret = bdrv_co_rw_vmstate(child_bs, qiov, pos, is_read);
+ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
 }
 
 bdrv_dec_in_flight(bs);
+
 return ret;
 }
 
@@ -2504,38 +2526,18 @@ int bdrv_save_vmstate(BlockDriverState *bs, const 
uint8_t *buf,
   int64_t pos, int size)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
-int ret;
+int ret = bdrv_writev_vmstate(bs, , pos);
 
-ret = bdrv_writev_vmstate(bs, , pos);
-if (ret < 0) {
-return ret;
-}
-
-return size;
-}
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
-return bdrv_rw_vmstate(bs, qiov, pos, false);
+return ret < 0 ? ret : size;
 }
 
 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
   int64_t pos, int size)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
-int ret;
+int ret = bdrv_readv_vmstate(bs, , pos);
 
-ret = bdrv_readv_vmstate(bs, , pos);
-if (ret < 0) {
-return ret;
-}
-
-return size;
-}
-
-int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
-return bdrv_rw_vmstate(bs, qiov, pos, true);
+return ret < 0 ? ret : 

[PATCH v8 05/14] qmp: Assert that no other monitor is active

2020-10-05 Thread Kevin Wolf
monitor_qmp_dispatch() is never supposed to be called in the context of
another monitor, so assert that monitor_cur() is NULL instead of saving
and restoring it.

Signed-off-by: Kevin Wolf 
---
 monitor/qmp.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/monitor/qmp.c b/monitor/qmp.c
index bb2d9d0cc7..8469970c69 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -140,8 +140,11 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject 
*req)
 QDict *error;
 
 old_mon = monitor_set_cur(>common);
+assert(old_mon == NULL);
+
 rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon));
-monitor_set_cur(old_mon);
+
+monitor_set_cur(NULL);
 
 if (mon->commands == _cap_negotiation_commands) {
 error = qdict_get_qdict(rsp, "error");
-- 
2.25.4




[PATCH v8 01/14] monitor: Add Monitor parameter to monitor_set_cpu()

2020-10-05 Thread Kevin Wolf
Most callers actually don't have to rely on cur_mon, but already know
for which monitor they call monitor_set_cpu().

Signed-off-by: Kevin Wolf 
Reviewed-by: Eric Blake 
---
 include/monitor/monitor.h |  2 +-
 monitor/hmp-cmds.c|  2 +-
 monitor/misc.c| 10 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index c0170773d4..04f472ac4f 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -33,7 +33,7 @@ int monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
 GCC_FMT_ATTR(2, 0);
 int monitor_printf(Monitor *mon, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
 void monitor_flush(Monitor *mon);
-int monitor_set_cpu(int cpu_index);
+int monitor_set_cpu(Monitor *mon, int cpu_index);
 int monitor_get_cpu_index(void);
 
 void monitor_read_command(MonitorHMP *mon, int show_prompt);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index dc0de39219..0c0a03f824 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -998,7 +998,7 @@ void hmp_cpu(Monitor *mon, const QDict *qdict)
 /* XXX: drop the monitor_set_cpu() usage when all HMP commands that
 use it are converted to the QAPI */
 cpu_index = qdict_get_int(qdict, "index");
-if (monitor_set_cpu(cpu_index) < 0) {
+if (monitor_set_cpu(mon, cpu_index) < 0) {
 monitor_printf(mon, "invalid CPU index\n");
 }
 }
diff --git a/monitor/misc.c b/monitor/misc.c
index 6e0da0cb96..25b42593cc 100644
--- a/monitor/misc.c
+++ b/monitor/misc.c
@@ -129,7 +129,7 @@ char *qmp_human_monitor_command(const char *command_line, 
bool has_cpu_index,
 cur_mon = 
 
 if (has_cpu_index) {
-int ret = monitor_set_cpu(cpu_index);
+int ret = monitor_set_cpu(, cpu_index);
 if (ret < 0) {
 cur_mon = old_mon;
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
@@ -255,7 +255,7 @@ static void monitor_init_qmp_commands(void)
 }
 
 /* Set the current CPU defined by the user. Callers must hold BQL. */
-int monitor_set_cpu(int cpu_index)
+int monitor_set_cpu(Monitor *mon, int cpu_index)
 {
 CPUState *cpu;
 
@@ -263,8 +263,8 @@ int monitor_set_cpu(int cpu_index)
 if (cpu == NULL) {
 return -1;
 }
-g_free(cur_mon->mon_cpu_path);
-cur_mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu));
+g_free(mon->mon_cpu_path);
+mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu));
 return 0;
 }
 
@@ -285,7 +285,7 @@ static CPUState *mon_get_cpu_sync(bool synchronize)
 if (!first_cpu) {
 return NULL;
 }
-monitor_set_cpu(first_cpu->cpu_index);
+monitor_set_cpu(cur_mon, first_cpu->cpu_index);
 cpu = first_cpu;
 }
 assert(cpu != NULL);
-- 
2.25.4




[PULL v2 10/17] scripts: add block-coroutine-wrapper.py

2020-10-05 Thread Stefan Hajnoczi
From: Vladimir Sementsov-Ogievskiy 

We have a very frequent pattern of creating a coroutine from a function
with several arguments:

  - create a structure to pack parameters
  - create _entry function to call original function taking parameters
from struct
  - do different magic to handle completion: set ret to NOT_DONE or
EINPROGRESS or use separate bool field
  - fill the struct and create coroutine from _entry function with this
struct as a parameter
  - do coroutine enter and BDRV_POLL_WHILE loop

Let's reduce code duplication by generating coroutine wrappers.

This patch adds scripts/block-coroutine-wrapper.py together with some
friends, which will generate functions with declared prototypes marked
by the 'generated_co_wrapper' specifier.

The usage of new code generation is as follows:

1. define the coroutine function somewhere

int coroutine_fn bdrv_co_NAME(...) {...}

2. declare in some header file

int generated_co_wrapper bdrv_NAME(...);

   with same list of parameters (generated_co_wrapper is
   defined in "include/block/block.h").

3. Make sure the block_gen_c declaration in block/meson.build
   mentions the file with your marker function.

Still, no function is now marked, this work is for the following
commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
Message-Id: <20200924185414.28642-5-vsement...@virtuozzo.com>
[Added encoding='utf-8' to open() calls as requested by Vladimir. Fixed
typo and grammar issues pointed out by Eric Blake. Removed clang-format
dependency that caused build test issues.
--Stefan]
Signed-off-by: Stefan Hajnoczi 
---
 block/block-gen.h  |  49 
 include/block/block.h  |  10 ++
 block/meson.build  |   8 ++
 docs/devel/block-coroutine-wrapper.rst |  54 
 docs/devel/index.rst   |   1 +
 scripts/block-coroutine-wrapper.py | 167 +
 6 files changed, 289 insertions(+)
 create mode 100644 block/block-gen.h
 create mode 100644 docs/devel/block-coroutine-wrapper.rst
 create mode 100644 scripts/block-coroutine-wrapper.py

diff --git a/block/block-gen.h b/block/block-gen.h
new file mode 100644
index 00..f80cf4897d
--- /dev/null
+++ b/block/block-gen.h
@@ -0,0 +1,49 @@
+/*
+ * Block coroutine wrapping core, used by auto-generated block/block-gen.c
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2020 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_BLOCK_GEN_H
+#define BLOCK_BLOCK_GEN_H
+
+#include "block/block_int.h"
+
+/* Base structure for argument packing structures */
+typedef struct BdrvPollCo {
+BlockDriverState *bs;
+bool in_progress;
+int ret;
+Coroutine *co; /* Keep pointer here for debugging */
+} BdrvPollCo;
+
+static inline int bdrv_poll_co(BdrvPollCo *s)
+{
+assert(!qemu_in_coroutine());
+
+bdrv_coroutine_enter(s->bs, s->co);
+BDRV_POLL_WHILE(s->bs, s->in_progress);
+
+return s->ret;
+}
+
+#endif /* BLOCK_BLOCK_GEN_H */
diff --git a/include/block/block.h b/include/block/block.h
index 81d591dd4c..0f0ddc51b4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -10,6 +10,16 @@
 #include "block/blockjob.h"
 #include "qemu/hbitmap.h"
 
+/*
+ * generated_co_wrapper
+ *
+ * Function specifier, which does nothing but mark functions to be
+ * generated by scripts/block-coroutine-wrapper.py
+ *
+ * Read more in docs/devel/block-coroutine-wrapper.rst
+ */
+#define generated_co_wrapper
+
 /* block.c */
 typedef struct BlockDriver BlockDriver;
 typedef struct BdrvChild BdrvChild;
diff --git a/block/meson.build b/block/meson.build
index 0b38dc36f7..78e8b25232 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -107,6 +107,14 @@ module_block_h = custom_target('module_block.h',
command: [module_block_py, 

[PULL v2 17/17] util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions

2020-10-05 Thread Stefan Hajnoczi
From: Eric Auger 

Introduce the qemu_vfio_find_fixed/temp_iova helpers which
respectively allocate IOVAs from the bottom/top parts of the
usable IOVA range, without picking within host IOVA reserved
windows. The allocation remains basic: if the size is too big
for the remaining of the current usable IOVA range, we jump
to the next one, leaving a hole in the address map.

Signed-off-by: Eric Auger 
Message-id: 20200929085550.30926-3-eric.au...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 util/vfio-helpers.c | 57 +
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index fe9ca9ce38..c469beb061 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -667,6 +667,50 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
 return true;
 }
 
+static int
+qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+{
+int i;
+
+for (i = 0; i < s->nb_iova_ranges; i++) {
+if (s->usable_iova_ranges[i].end < s->low_water_mark) {
+continue;
+}
+s->low_water_mark =
+MAX(s->low_water_mark, s->usable_iova_ranges[i].start);
+
+if (s->usable_iova_ranges[i].end - s->low_water_mark + 1 >= size ||
+s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
+*iova = s->low_water_mark;
+s->low_water_mark += size;
+return 0;
+}
+}
+return -ENOMEM;
+}
+
+static int
+qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+{
+int i;
+
+for (i = s->nb_iova_ranges - 1; i >= 0; i--) {
+if (s->usable_iova_ranges[i].start > s->high_water_mark) {
+continue;
+}
+s->high_water_mark =
+MIN(s->high_water_mark, s->usable_iova_ranges[i].end + 1);
+
+if (s->high_water_mark - s->usable_iova_ranges[i].start + 1 >= size ||
+s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
+*iova = s->high_water_mark - size;
+s->high_water_mark = *iova;
+return 0;
+}
+}
+return -ENOMEM;
+}
+
 /* Map [host, host + size) area into a contiguous IOVA address space, and store
  * the result in @iova if not NULL. The caller need to make sure the area is
  * aligned to page size, and mustn't overlap with existing mapping areas (split
@@ -693,7 +737,11 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t 
size,
 goto out;
 }
 if (!temporary) {
-iova0 = s->low_water_mark;
+if (qemu_vfio_find_fixed_iova(s, size, )) {
+ret = -ENOMEM;
+goto out;
+}
+
 mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
 if (!mapping) {
 ret = -ENOMEM;
@@ -705,15 +753,16 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, 
size_t size,
 qemu_vfio_undo_mapping(s, mapping, NULL);
 goto out;
 }
-s->low_water_mark += size;
 qemu_vfio_dump_mappings(s);
 } else {
-iova0 = s->high_water_mark - size;
+if (qemu_vfio_find_temp_iova(s, size, )) {
+ret = -ENOMEM;
+goto out;
+}
 ret = qemu_vfio_do_mapping(s, host, size, iova0);
 if (ret) {
 goto out;
 }
-s->high_water_mark -= size;
 }
 }
 if (iova) {
-- 
2.26.2



[PULL v2 14/17] include/block/block.h: drop non-ascii quotation mark

2020-10-05 Thread Stefan Hajnoczi
From: Vladimir Sementsov-Ogievskiy 

This is the only non-ascii character in the file and it doesn't really
needed here. Let's use normal "'" symbol for consistency with the rest
11 occurrences of "'" in the file.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
Signed-off-by: Stefan Hajnoczi 
---
 include/block/block.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/block/block.h b/include/block/block.h
index 8b87df69a1..ce2ac39299 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -304,7 +304,7 @@ enum BdrvChildRoleBits {
 BDRV_CHILD_FILTERED = (1 << 2),
 
 /*
- * Child from which to read all data that isn’t allocated in the
+ * Child from which to read all data that isn't allocated in the
  * parent (i.e., the backing child); such data is copied to the
  * parent through COW (and optionally COR).
  * This field is mutually exclusive with DATA, METADATA, and
-- 
2.26.2



[PULL v2 15/17] docs: add 'io_uring' option to 'aio' param in qemu-options.hx

2020-10-05 Thread Stefan Hajnoczi
From: Stefano Garzarella 

When we added io_uring AIO engine, we forgot to update qemu-options.hx,
so qemu(1) man page and qemu help were outdated.

Signed-off-by: Stefano Garzarella 
Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Julia Suvorova 
Reviewed-by: Pankaj Gupta 
Message-Id: <20200924151511.131471-1-sgarz...@redhat.com>
---
 qemu-options.hx | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index 3564c2303f..1da52a269c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1053,7 +1053,8 @@ SRST
 The path to the image file in the local filesystem
 
 ``aio``
-Specifies the AIO backend (threads/native, default: threads)
+Specifies the AIO backend (threads/native/io_uring,
+default: threads)
 
 ``locking``
 Specifies whether the image file is protected with Linux OFD
@@ -1175,7 +1176,8 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
 "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
 "   
[,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
 "   [,snapshot=on|off][,rerror=ignore|stop|report]\n"
-"   
[,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
+"   [,werror=ignore|stop|report|enospc][,id=name]\n"
+"   [,aio=threads|native|io_uring]\n"
 "   [,readonly=on|off][,copy-on-read=on|off]\n"
 "   [,discard=ignore|unmap][,detect-zeroes=on|off|unmap]\n"
 "   [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
@@ -1247,8 +1249,8 @@ SRST
 The default mode is ``cache=writeback``.
 
 ``aio=aio``
-aio is "threads", or "native" and selects between pthread based
-disk I/O and native Linux AIO.
+aio is "threads", "native", or "io_uring" and selects between pthread
+based disk I/O, native Linux AIO, or Linux io_uring API.
 
 ``format=format``
 Specify which disk format will be used rather than detecting the
-- 
2.26.2



[PULL v2 03/17] block/nvme: Reduce I/O registers scope

2020-10-05 Thread Stefan Hajnoczi
From: Philippe Mathieu-Daudé 

We only access the I/O register in nvme_init().
Remove the reference in BDRVNVMeState and reduce its scope.

Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Stefan Hajnoczi 
Message-Id: <20200922083821.578519-4-phi...@redhat.com>
---
 block/nvme.c | 29 -
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 3c834da8fe..e517c7539f 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -98,7 +98,6 @@ enum {
 struct BDRVNVMeState {
 AioContext *aio_context;
 QEMUVFIOState *vfio;
-NVMeRegs *regs;
 /* Memory mapped registers */
 volatile struct {
 uint32_t sq_tail;
@@ -695,6 +694,7 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 uint64_t timeout_ms;
 uint64_t deadline, now;
 Error *local_err = NULL;
+NVMeRegs *regs;
 
 qemu_co_mutex_init(>dma_map_lock);
 qemu_co_queue_init(>dma_flush_queue);
@@ -713,16 +713,16 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 goto out;
 }
 
-s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
-PROT_READ | PROT_WRITE, errp);
-if (!s->regs) {
+regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
+ PROT_READ | PROT_WRITE, errp);
+if (!regs) {
 ret = -EINVAL;
 goto out;
 }
 /* Perform initialize sequence as described in NVMe spec "7.6.1
  * Initialization". */
 
-cap = le64_to_cpu(s->regs->ctrl.cap);
+cap = le64_to_cpu(regs->ctrl.cap);
 if (!(cap & (1ULL << 37))) {
 error_setg(errp, "Device doesn't support NVMe command set");
 ret = -EINVAL;
@@ -735,10 +735,10 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 3);
 
 /* Reset device to get a clean state. */
-s->regs->ctrl.cc = cpu_to_le32(le32_to_cpu(s->regs->ctrl.cc) & 0xFE);
+regs->ctrl.cc = cpu_to_le32(le32_to_cpu(regs->ctrl.cc) & 0xFE);
 /* Wait for CSTS.RDY = 0. */
 deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
-while (le32_to_cpu(s->regs->ctrl.csts) & 0x1) {
+while (le32_to_cpu(regs->ctrl.csts) & 0x1) {
 if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
 error_setg(errp, "Timeout while waiting for device to reset (%"
  PRId64 " ms)",
@@ -766,18 +766,18 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 }
 s->nr_queues = 1;
 QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
-s->regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
-s->regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
-s->regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
+regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
+regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
+regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
 
 /* After setting up all control registers we can enable device now. */
-s->regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
+regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
   (ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
   0x1);
 /* Wait for CSTS.RDY = 1. */
 now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 deadline = now + timeout_ms * 100;
-while (!(le32_to_cpu(s->regs->ctrl.csts) & 0x1)) {
+while (!(le32_to_cpu(regs->ctrl.csts) & 0x1)) {
 if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
 error_setg(errp, "Timeout while waiting for device to start (%"
  PRId64 " ms)",
@@ -808,6 +808,10 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 ret = -EIO;
 }
 out:
+if (regs) {
+qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)regs, 0, sizeof(NvmeBar));
+}
+
 /* Cleaning up is done in nvme_file_open() upon error. */
 return ret;
 }
@@ -882,7 +886,6 @@ static void nvme_close(BlockDriverState *bs)
 event_notifier_cleanup(>irq_notifier[MSIX_SHARED_IRQ_IDX]);
 qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells,
 sizeof(NvmeBar), NVME_DOORBELL_SIZE);
-qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, sizeof(NvmeBar));
 qemu_vfio_close(s->vfio);
 
 g_free(s->device);
-- 
2.26.2



[PULL v2 12/17] block: drop bdrv_prwv

2020-10-05 Thread Stefan Hajnoczi
From: Vladimir Sementsov-Ogievskiy 

Now that we are not maintaining boilerplate code for coroutine
wrappers, there is no more sense in keeping the extra indirection layer
of bdrv_prwv().  Let's drop it and instead generate pure bdrv_preadv()
and bdrv_pwritev().

Currently, bdrv_pwritev() and bdrv_preadv() are returning bytes on
success, auto generated functions will instead return zero, as their
_co_ prototype. Still, it's simple to make the conversion safe: the
only external user of bdrv_pwritev() is test-bdrv-drain, and it is
comfortable enough with bdrv_co_pwritev() instead. So prototypes are
moved to local block/coroutines.h. Next, the only internal use is
bdrv_pread() and bdrv_pwrite(), which are modified to return bytes on
success.

Of course, it would be great to convert bdrv_pread() and bdrv_pwrite()
to return 0 on success. But this requires audit (and probably
conversion) of all their users, let's leave it for another day
refactoring.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20200924185414.28642-7-vsement...@virtuozzo.com>
---
 block/coroutines.h  | 10 -
 include/block/block.h   |  2 --
 block/io.c  | 49 -
 tests/test-bdrv-drain.c |  2 +-
 4 files changed, 15 insertions(+), 48 deletions(-)

diff --git a/block/coroutines.h b/block/coroutines.h
index c62b3a2697..6c63a819c9 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -31,12 +31,12 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix);
 int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
 
-int coroutine_fn
-bdrv_co_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
- bool is_write, BdrvRequestFlags flags);
 int generated_co_wrapper
-bdrv_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
-  bool is_write, BdrvRequestFlags flags);
+bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
+QEMUIOVector *qiov, BdrvRequestFlags flags);
+int generated_co_wrapper
+bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
 
 int coroutine_fn
 bdrv_co_common_block_status_above(BlockDriverState *bs,
diff --git a/include/block/block.h b/include/block/block.h
index f2d85f2cf1..eef4cceaf0 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -383,9 +383,7 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags);
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
 int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes);
-int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
 int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes);
-int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
 int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
  const void *buf, int count);
 /*
diff --git a/block/io.c b/block/io.c
index 2891303a8d..c3dc1db036 100644
--- a/block/io.c
+++ b/block/io.c
@@ -890,23 +890,11 @@ static int bdrv_check_byte_request(BlockDriverState *bs, 
int64_t offset,
 return 0;
 }
 
-int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
-  QEMUIOVector *qiov, bool is_write,
-  BdrvRequestFlags flags)
-{
-if (is_write) {
-return bdrv_co_pwritev(child, offset, qiov->size, qiov, flags);
-} else {
-return bdrv_co_preadv(child, offset, qiov->size, qiov, flags);
-}
-}
-
 int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags)
 {
-QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
-
-return bdrv_prwv(child, offset, , true, BDRV_REQ_ZERO_WRITE | flags);
+return bdrv_pwritev(child, offset, bytes, NULL,
+BDRV_REQ_ZERO_WRITE | flags);
 }
 
 /*
@@ -950,41 +938,19 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
 }
 }
 
-/* return < 0 if error. See bdrv_pwrite() for the return codes */
-int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
-{
-int ret;
-
-ret = bdrv_prwv(child, offset, qiov, false, 0);
-if (ret < 0) {
-return ret;
-}
-
-return qiov->size;
-}
-
 /* See bdrv_pwrite() for the return codes */
 int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
 {
+int ret;
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 
 if (bytes < 0) {
 return -EINVAL;
 }
 
-return bdrv_preadv(child, offset, );
-}
+ret = bdrv_preadv(child, offset, bytes, ,  0);
 
-int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
-{
-int 

[PULL v2 11/17] block: generate coroutine-wrapper code

2020-10-05 Thread Stefan Hajnoczi
From: Vladimir Sementsov-Ogievskiy 

Use code generation implemented in previous commit to generated
coroutine wrappers in block.c and block/io.c

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20200924185414.28642-6-vsement...@virtuozzo.com>
---
 block/coroutines.h|   6 +-
 include/block/block.h |  16 ++--
 block.c   |  73 ---
 block/io.c| 212 --
 4 files changed, 13 insertions(+), 294 deletions(-)

diff --git a/block/coroutines.h b/block/coroutines.h
index 9ce1730a09..c62b3a2697 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -34,7 +34,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState 
*bs, Error **errp);
 int coroutine_fn
 bdrv_co_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
  bool is_write, BdrvRequestFlags flags);
-int
+int generated_co_wrapper
 bdrv_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
   bool is_write, BdrvRequestFlags flags);
 
@@ -47,7 +47,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
   int64_t *pnum,
   int64_t *map,
   BlockDriverState **file);
-int
+int generated_co_wrapper
 bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool want_zero,
@@ -60,7 +60,7 @@ bdrv_common_block_status_above(BlockDriverState *bs,
 int coroutine_fn
 bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read);
-int
+int generated_co_wrapper
 bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
 bool is_read);
 
diff --git a/include/block/block.h b/include/block/block.h
index 0f0ddc51b4..f2d85f2cf1 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -403,8 +403,9 @@ void bdrv_refresh_filename(BlockDriverState *bs);
 int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
   PreallocMode prealloc, BdrvRequestFlags 
flags,
   Error **errp);
-int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
-  PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+int generated_co_wrapper
+bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
+  PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
 
 int64_t bdrv_nb_sectors(BlockDriverState *bs);
 int64_t bdrv_getlength(BlockDriverState *bs);
@@ -446,7 +447,8 @@ typedef enum {
 BDRV_FIX_ERRORS   = 2,
 } BdrvCheckMode;
 
-int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
+int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
+BdrvCheckMode fix);
 
 /* The units of offset and total_work_size may be chosen arbitrarily by the
  * block driver; total_work_size may change during the course of the amendment
@@ -470,12 +472,13 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb);
 int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
 
 /* Invalidate any cached metadata used by image formats */
-int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
+int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
+   Error **errp);
 void bdrv_invalidate_cache_all(Error **errp);
 int bdrv_inactivate_all(void);
 
 /* Ensure contents are flushed to disk.  */
-int bdrv_flush(BlockDriverState *bs);
+int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
 int bdrv_flush_all(void);
 void bdrv_close_all(void);
@@ -490,7 +493,8 @@ void bdrv_drain_all(void);
 AIO_WAIT_WHILE(bdrv_get_aio_context(bs_),  \
cond); })
 
-int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
+   int64_t bytes);
 int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
 int bdrv_has_zero_init_1(BlockDriverState *bs);
 int bdrv_has_zero_init(BlockDriverState *bs);
diff --git a/block.c b/block.c
index 324714351c..52b2e2709f 100644
--- a/block.c
+++ b/block.c
@@ -4691,43 +4691,6 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs,
 return bs->drv->bdrv_co_check(bs, res, fix);
 }
 
-typedef struct CheckCo {
-BlockDriverState *bs;
-BdrvCheckResult *res;
-BdrvCheckMode fix;
-int ret;
-} CheckCo;
-
-static void coroutine_fn bdrv_check_co_entry(void *opaque)
-{
-CheckCo *cco = opaque;
-cco->ret = bdrv_co_check(cco->bs, cco->res, cco->fix);
-aio_wait_kick();
-}
-
-int bdrv_check(BlockDriverState *bs,
-   

[PULL v2 07/17] block: return error-code from bdrv_invalidate_cache

2020-10-05 Thread Stefan Hajnoczi
From: Vladimir Sementsov-Ogievskiy 

This is the only coroutine wrapper from block.c and block/io.c which
doesn't return a value, so let's convert it to the common behavior, to
simplify moving to generated coroutine wrappers in a further commit.

Also, bdrv_invalidate_cache is a void function, returning error only
through **errp parameter, which is considered to be bad practice, as
it forces callers to define and propagate local_err variable, so
conversion is good anyway.

This patch leaves the conversion of .bdrv_co_invalidate_cache() driver
callbacks and bdrv_invalidate_cache_all() for another day.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20200924185414.28642-2-vsement...@virtuozzo.com>
---
 include/block/block.h |  2 +-
 block.c   | 32 ++--
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 981ab5b314..81d591dd4c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -460,7 +460,7 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb);
 int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
 
 /* Invalidate any cached metadata used by image formats */
-void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
+int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
 void bdrv_invalidate_cache_all(Error **errp);
 int bdrv_inactivate_all(void);
 
diff --git a/block.c b/block.c
index f4b6dd5d3d..8de14de49c 100644
--- a/block.c
+++ b/block.c
@@ -5781,8 +5781,8 @@ void bdrv_init_with_whitelist(void)
 bdrv_init();
 }
 
-static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
-  Error **errp)
+static int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
+ Error **errp)
 {
 BdrvChild *child, *parent;
 uint64_t perm, shared_perm;
@@ -5791,14 +5791,14 @@ static void coroutine_fn 
bdrv_co_invalidate_cache(BlockDriverState *bs,
 BdrvDirtyBitmap *bm;
 
 if (!bs->drv)  {
-return;
+return -ENOMEDIUM;
 }
 
 QLIST_FOREACH(child, >children, next) {
 bdrv_co_invalidate_cache(child->bs, _err);
 if (local_err) {
 error_propagate(errp, local_err);
-return;
+return -EINVAL;
 }
 }
 
@@ -5821,7 +5821,7 @@ static void coroutine_fn 
bdrv_co_invalidate_cache(BlockDriverState *bs,
 ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, errp);
 if (ret < 0) {
 bs->open_flags |= BDRV_O_INACTIVE;
-return;
+return ret;
 }
 bdrv_set_perm(bs, perm, shared_perm);
 
@@ -5830,7 +5830,7 @@ static void coroutine_fn 
bdrv_co_invalidate_cache(BlockDriverState *bs,
 if (local_err) {
 bs->open_flags |= BDRV_O_INACTIVE;
 error_propagate(errp, local_err);
-return;
+return -EINVAL;
 }
 }
 
@@ -5842,7 +5842,7 @@ static void coroutine_fn 
bdrv_co_invalidate_cache(BlockDriverState *bs,
 if (ret < 0) {
 bs->open_flags |= BDRV_O_INACTIVE;
 error_setg_errno(errp, -ret, "Could not refresh total sector 
count");
-return;
+return ret;
 }
 }
 
@@ -5852,27 +5852,30 @@ static void coroutine_fn 
bdrv_co_invalidate_cache(BlockDriverState *bs,
 if (local_err) {
 bs->open_flags |= BDRV_O_INACTIVE;
 error_propagate(errp, local_err);
-return;
+return -EINVAL;
 }
 }
 }
+
+return 0;
 }
 
 typedef struct InvalidateCacheCo {
 BlockDriverState *bs;
 Error **errp;
 bool done;
+int ret;
 } InvalidateCacheCo;
 
 static void coroutine_fn bdrv_invalidate_cache_co_entry(void *opaque)
 {
 InvalidateCacheCo *ico = opaque;
-bdrv_co_invalidate_cache(ico->bs, ico->errp);
+ico->ret = bdrv_co_invalidate_cache(ico->bs, ico->errp);
 ico->done = true;
 aio_wait_kick();
 }
 
-void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
+int bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
 Coroutine *co;
 InvalidateCacheCo ico = {
@@ -5889,22 +5892,23 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error 
**errp)
 bdrv_coroutine_enter(bs, co);
 BDRV_POLL_WHILE(bs, !ico.done);
 }
+
+return ico.ret;
 }
 
 void bdrv_invalidate_cache_all(Error **errp)
 {
 BlockDriverState *bs;
-Error *local_err = NULL;
 BdrvNextIterator it;
 
 for (bs = bdrv_first(); bs; bs = bdrv_next()) {
 AioContext *aio_context = bdrv_get_aio_context(bs);
+int ret;
 
 aio_context_acquire(aio_context);
-bdrv_invalidate_cache(bs, _err);
+ret = 

[PULL v2 16/17] util/vfio-helpers: Collect IOVA reserved regions

2020-10-05 Thread Stefan Hajnoczi
From: Eric Auger 

The IOVA allocator currently ignores host reserved regions.
As a result some chosen IOVAs may collide with some of them,
resulting in VFIO MAP_DMA errors later on. This happens on ARM
where the MSI reserved window quickly is encountered:
[0x800, 0x810]. since 5.4 kernel, VFIO returns the usable
IOVA regions. So let's enumerate them in the prospect to avoid
them, later on.

Signed-off-by: Eric Auger 
Message-id: 20200929085550.30926-2-eric.au...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 util/vfio-helpers.c | 72 +++--
 1 file changed, 70 insertions(+), 2 deletions(-)

diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 9ac307e3d4..fe9ca9ce38 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -40,6 +40,11 @@ typedef struct {
 uint64_t iova;
 } IOVAMapping;
 
+struct IOVARange {
+uint64_t start;
+uint64_t end;
+};
+
 struct QEMUVFIOState {
 QemuMutex lock;
 
@@ -49,6 +54,8 @@ struct QEMUVFIOState {
 int device;
 RAMBlockNotifier ram_notifier;
 struct vfio_region_info config_region_info, bar_region_info[6];
+struct IOVARange *usable_iova_ranges;
+uint8_t nb_iova_ranges;
 
 /* These fields are protected by @lock */
 /* VFIO's IO virtual address space is managed by splitting into a few
@@ -236,6 +243,35 @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, 
void *buf, int size, int
 return ret == size ? 0 : -errno;
 }
 
+static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
+{
+struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf;
+struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset;
+struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
+int i;
+
+while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
+if (!cap->next) {
+return;
+}
+cap = (struct vfio_info_cap_header *)(buf + cap->next);
+}
+
+cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
+
+s->nb_iova_ranges = cap_iova_range->nr_iovas;
+if (s->nb_iova_ranges > 1) {
+s->usable_iova_ranges =
+g_realloc(s->usable_iova_ranges,
+  s->nb_iova_ranges * sizeof(struct IOVARange));
+}
+
+for (i = 0; i < s->nb_iova_ranges; i++) {
+s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
+s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
+}
+}
+
 static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
   Error **errp)
 {
@@ -243,10 +279,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const 
char *device,
 int i;
 uint16_t pci_cmd;
 struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
-struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
+struct vfio_iommu_type1_info *iommu_info = NULL;
+size_t iommu_info_size = sizeof(*iommu_info);
 struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
 char *group_file = NULL;
 
+s->usable_iova_ranges = NULL;
+
 /* Create a new container */
 s->container = open("/dev/vfio/vfio", O_RDWR);
 
@@ -310,13 +349,35 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const 
char *device,
 goto fail;
 }
 
+iommu_info = g_malloc0(iommu_info_size);
+iommu_info->argsz = iommu_info_size;
+
 /* Get additional IOMMU info */
-if (ioctl(s->container, VFIO_IOMMU_GET_INFO, _info)) {
+if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
 error_setg_errno(errp, errno, "Failed to get IOMMU info");
 ret = -errno;
 goto fail;
 }
 
+/*
+ * if the kernel does not report usable IOVA regions, choose
+ * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
+ */
+s->nb_iova_ranges = 1;
+s->usable_iova_ranges = g_new0(struct IOVARange, 1);
+s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
+s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
+
+if (iommu_info->argsz > iommu_info_size) {
+iommu_info_size = iommu_info->argsz;
+iommu_info = g_realloc(iommu_info, iommu_info_size);
+if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
+ret = -errno;
+goto fail;
+}
+collect_usable_iova_ranges(s, iommu_info);
+}
+
 s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
 
 if (s->device < 0) {
@@ -365,8 +426,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char 
*device,
 if (ret) {
 goto fail;
 }
+g_free(iommu_info);
 return 0;
 fail:
+g_free(s->usable_iova_ranges);
+s->usable_iova_ranges = NULL;
+s->nb_iova_ranges = 0;
+g_free(iommu_info);
 close(s->group);
 fail_container:
 close(s->container);
@@ -716,6 +782,8 @@ void qemu_vfio_close(QEMUVFIOState *s)
 

[PULL v2 09/17] block: declare some coroutine functions in block/coroutines.h

2020-10-05 Thread Stefan Hajnoczi
From: Vladimir Sementsov-Ogievskiy 

We are going to keep coroutine-wrappers code (structure-packing
parameters, BDRV_POLL wrapper functions) in separate auto-generated
files. So, we'll need a header with declaration of original _co_
functions, for those which are static now. As well, we'll need
declarations for wrapper functions. Do these declarations now, as a
preparation step.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20200924185414.28642-4-vsement...@virtuozzo.com>
---
 block/coroutines.h | 67 ++
 block.c|  8 +++---
 block/io.c | 34 +++
 3 files changed, 88 insertions(+), 21 deletions(-)
 create mode 100644 block/coroutines.h

diff --git a/block/coroutines.h b/block/coroutines.h
new file mode 100644
index 00..9ce1730a09
--- /dev/null
+++ b/block/coroutines.h
@@ -0,0 +1,67 @@
+/*
+ * Block layer I/O functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_COROUTINES_INT_H
+#define BLOCK_COROUTINES_INT_H
+
+#include "block/block_int.h"
+
+int coroutine_fn bdrv_co_check(BlockDriverState *bs,
+   BdrvCheckResult *res, BdrvCheckMode fix);
+int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
+
+int coroutine_fn
+bdrv_co_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
+ bool is_write, BdrvRequestFlags flags);
+int
+bdrv_prwv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov,
+  bool is_write, BdrvRequestFlags flags);
+
+int coroutine_fn
+bdrv_co_common_block_status_above(BlockDriverState *bs,
+  BlockDriverState *base,
+  bool want_zero,
+  int64_t offset,
+  int64_t bytes,
+  int64_t *pnum,
+  int64_t *map,
+  BlockDriverState **file);
+int
+bdrv_common_block_status_above(BlockDriverState *bs,
+   BlockDriverState *base,
+   bool want_zero,
+   int64_t offset,
+   int64_t bytes,
+   int64_t *pnum,
+   int64_t *map,
+   BlockDriverState **file);
+
+int coroutine_fn
+bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+   bool is_read);
+int
+bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+bool is_read);
+
+#endif /* BLOCK_COROUTINES_INT_H */
diff --git a/block.c b/block.c
index 8de14de49c..324714351c 100644
--- a/block.c
+++ b/block.c
@@ -48,6 +48,7 @@
 #include "qemu/timer.h"
 #include "qemu/cutils.h"
 #include "qemu/id.h"
+#include "block/coroutines.h"
 
 #ifdef CONFIG_BSD
 #include 
@@ -4676,8 +4677,8 @@ static void bdrv_delete(BlockDriverState *bs)
  * free of errors) or -errno when an internal error occurred. The results of 
the
  * check are stored in res.
  */
-static int coroutine_fn bdrv_co_check(BlockDriverState *bs,
-  BdrvCheckResult *res, BdrvCheckMode fix)
+int coroutine_fn bdrv_co_check(BlockDriverState *bs,
+   BdrvCheckResult *res, BdrvCheckMode fix)
 {
 if (bs->drv == NULL) {
 return -ENOMEDIUM;
@@ -5781,8 +5782,7 @@ void bdrv_init_with_whitelist(void)
 bdrv_init();
 }
 
-static int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
- Error **errp)
+int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
 BdrvChild *child, *parent;
 

[PULL v2 08/17] block/io: refactor coroutine wrappers

2020-10-05 Thread Stefan Hajnoczi
From: Vladimir Sementsov-Ogievskiy 

Most of our coroutine wrappers already follow this convention:

We have 'coroutine_fn bdrv_co_()' as
the core function, and a wrapper 'bdrv_()' which does parameter packing and calls bdrv_run_co().

The only outsiders are the bdrv_prwv_co and
bdrv_common_block_status_above wrappers. Let's refactor them to behave
as the others, it simplifies further conversion of coroutine wrappers.

This patch adds an indirection layer, but it will be compensated by
a further commit, which will drop bdrv_co_prwv together with the
is_write logic, to keep the read and write paths separate.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20200924185414.28642-3-vsement...@virtuozzo.com>
---
 block/io.c | 60 +-
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/block/io.c b/block/io.c
index 11df1889f1..b4f6ab0ab1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -933,27 +933,31 @@ typedef struct RwCo {
 BdrvRequestFlags flags;
 } RwCo;
 
+static int coroutine_fn bdrv_co_prwv(BdrvChild *child, int64_t offset,
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
+{
+if (is_write) {
+return bdrv_co_pwritev(child, offset, qiov->size, qiov, flags);
+} else {
+return bdrv_co_preadv(child, offset, qiov->size, qiov, flags);
+}
+}
+
 static int coroutine_fn bdrv_rw_co_entry(void *opaque)
 {
 RwCo *rwco = opaque;
 
-if (!rwco->is_write) {
-return bdrv_co_preadv(rwco->child, rwco->offset,
-  rwco->qiov->size, rwco->qiov,
-  rwco->flags);
-} else {
-return bdrv_co_pwritev(rwco->child, rwco->offset,
-   rwco->qiov->size, rwco->qiov,
-   rwco->flags);
-}
+return bdrv_co_prwv(rwco->child, rwco->offset, rwco->qiov,
+rwco->is_write, rwco->flags);
 }
 
 /*
  * Process a vectored synchronous request using coroutines
  */
-static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
-QEMUIOVector *qiov, bool is_write,
-BdrvRequestFlags flags)
+static int bdrv_prwv(BdrvChild *child, int64_t offset,
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
 {
 RwCo rwco = {
 .child = child,
@@ -971,8 +975,7 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
 
-return bdrv_prwv_co(child, offset, , true,
-BDRV_REQ_ZERO_WRITE | flags);
+return bdrv_prwv(child, offset, , true, BDRV_REQ_ZERO_WRITE | flags);
 }
 
 /*
@@ -1021,7 +1024,7 @@ int bdrv_preadv(BdrvChild *child, int64_t offset, 
QEMUIOVector *qiov)
 {
 int ret;
 
-ret = bdrv_prwv_co(child, offset, qiov, false, 0);
+ret = bdrv_prwv(child, offset, qiov, false, 0);
 if (ret < 0) {
 return ret;
 }
@@ -1045,7 +1048,7 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, 
QEMUIOVector *qiov)
 {
 int ret;
 
-ret = bdrv_prwv_co(child, offset, qiov, true, 0);
+ret = bdrv_prwv(child, offset, qiov, true, 0);
 if (ret < 0) {
 return ret;
 }
@@ -2449,14 +2452,15 @@ early_out:
 return ret;
 }
 
-static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
-   BlockDriverState *base,
-   bool want_zero,
-   int64_t offset,
-   int64_t bytes,
-   int64_t *pnum,
-   int64_t *map,
-   BlockDriverState **file)
+static int coroutine_fn
+bdrv_co_common_block_status_above(BlockDriverState *bs,
+  BlockDriverState *base,
+  bool want_zero,
+  int64_t offset,
+  int64_t bytes,
+  int64_t *pnum,
+  int64_t *map,
+  BlockDriverState **file)
 {
 BlockDriverState *p;
 int ret = 0;
@@ -2494,10 +2498,10 @@ static int coroutine_fn 
bdrv_block_status_above_co_entry(void *opaque)
 {
 BdrvCoBlockStatusData *data = opaque;
 
-return bdrv_co_block_status_above(data->bs, data->base,
-  data->want_zero,
-  data->offset, data->bytes,
-  data->pnum, data->map, data->file);
+

[PULL v2 02/17] block/nvme: Map doorbells pages write-only

2020-10-05 Thread Stefan Hajnoczi
From: Philippe Mathieu-Daudé 

Per the datasheet sections 3.1.13/3.1.14:
  "The host should not read the doorbell registers."

As we don't need read access, map the doorbells with write-only
permission. We keep a reference to this mapped address in the
BDRVNVMeState structure.

Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Stefan Hajnoczi 
Message-Id: <20200922083821.578519-3-phi...@redhat.com>
---
 block/nvme.c | 29 +++--
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 5a4dc6a722..3c834da8fe 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -31,7 +31,7 @@
 #define NVME_SQ_ENTRY_BYTES 64
 #define NVME_CQ_ENTRY_BYTES 16
 #define NVME_QUEUE_SIZE 128
-#define NVME_BAR_SIZE 8192
+#define NVME_DOORBELL_SIZE 4096
 
 /*
  * We have to leave one slot empty as that is the full queue case where
@@ -84,10 +84,6 @@ typedef struct {
 /* Memory mapped registers */
 typedef volatile struct {
 NvmeBar ctrl;
-struct {
-uint32_t sq_tail;
-uint32_t cq_head;
-} doorbells[];
 } NVMeRegs;
 
 #define INDEX_ADMIN 0
@@ -103,6 +99,11 @@ struct BDRVNVMeState {
 AioContext *aio_context;
 QEMUVFIOState *vfio;
 NVMeRegs *regs;
+/* Memory mapped registers */
+volatile struct {
+uint32_t sq_tail;
+uint32_t cq_head;
+} *doorbells;
 /* The submission/completion queue pairs.
  * [0]: admin queue.
  * [1..]: io queues.
@@ -247,14 +248,14 @@ static NVMeQueuePair 
*nvme_create_queue_pair(BDRVNVMeState *s,
 error_propagate(errp, local_err);
 goto fail;
 }
-q->sq.doorbell = >regs->doorbells[idx * s->doorbell_scale].sq_tail;
+q->sq.doorbell = >doorbells[idx * s->doorbell_scale].sq_tail;
 
 nvme_init_queue(s, >cq, size, NVME_CQ_ENTRY_BYTES, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto fail;
 }
-q->cq.doorbell = >regs->doorbells[idx * s->doorbell_scale].cq_head;
+q->cq.doorbell = >doorbells[idx * s->doorbell_scale].cq_head;
 
 return q;
 fail:
@@ -712,13 +713,12 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 goto out;
 }
 
-s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE,
+s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
 PROT_READ | PROT_WRITE, errp);
 if (!s->regs) {
 ret = -EINVAL;
 goto out;
 }
-
 /* Perform initialize sequence as described in NVMe spec "7.6.1
  * Initialization". */
 
@@ -748,6 +748,13 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 }
 }
 
+s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar),
+ NVME_DOORBELL_SIZE, PROT_WRITE, errp);
+if (!s->doorbells) {
+ret = -EINVAL;
+goto out;
+}
+
 /* Set up admin queue. */
 s->queues = g_new(NVMeQueuePair *, 1);
 s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0,
@@ -873,7 +880,9 @@ static void nvme_close(BlockDriverState *bs)
>irq_notifier[MSIX_SHARED_IRQ_IDX],
false, NULL, NULL);
 event_notifier_cleanup(>irq_notifier[MSIX_SHARED_IRQ_IDX]);
-qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE);
+qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells,
+sizeof(NvmeBar), NVME_DOORBELL_SIZE);
+qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, sizeof(NvmeBar));
 qemu_vfio_close(s->vfio);
 
 g_free(s->device);
-- 
2.26.2



[PULL v2 01/17] util/vfio-helpers: Pass page protections to qemu_vfio_pci_map_bar()

2020-10-05 Thread Stefan Hajnoczi
From: Philippe Mathieu-Daudé 

Pages are currently mapped READ/WRITE. To be able to use different
protections, add a new argument to qemu_vfio_pci_map_bar().

Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Stefan Hajnoczi 
Message-Id: <20200922083821.578519-2-phi...@redhat.com>
---
 include/qemu/vfio-helpers.h | 2 +-
 block/nvme.c| 3 ++-
 util/vfio-helpers.c | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h
index 1f057c2b9e..4491c8e1a6 100644
--- a/include/qemu/vfio-helpers.h
+++ b/include/qemu/vfio-helpers.h
@@ -22,7 +22,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t 
size,
 int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s);
 void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host);
 void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
-uint64_t offset, uint64_t size,
+uint64_t offset, uint64_t size, int prot,
 Error **errp);
 void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar,
  uint64_t offset, uint64_t size);
diff --git a/block/nvme.c b/block/nvme.c
index f4f27b6da7..5a4dc6a722 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -712,7 +712,8 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 goto out;
 }
 
-s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE, errp);
+s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE,
+PROT_READ | PROT_WRITE, errp);
 if (!s->regs) {
 ret = -EINVAL;
 goto out;
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 583bdfb36f..9ac307e3d4 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -146,13 +146,13 @@ static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int 
index, Error **errp)
  * Map a PCI bar area.
  */
 void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
-uint64_t offset, uint64_t size,
+uint64_t offset, uint64_t size, int prot,
 Error **errp)
 {
 void *p;
 assert_bar_index_valid(s, index);
 p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
- PROT_READ | PROT_WRITE, MAP_SHARED,
+ prot, MAP_SHARED,
  s->device, s->bar_region_info[index].offset + offset);
 if (p == MAP_FAILED) {
 error_setg_errno(errp, errno, "Failed to map BAR region");
-- 
2.26.2



[PULL v2 06/17] block/nvme: Replace magic value by SCALE_MS definition

2020-10-05 Thread Stefan Hajnoczi
From: Philippe Mathieu-Daudé 

Use self-explicit SCALE_MS definition instead of magic value
(missed in similar commit e4f310fe7f5).

Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Stefan Hajnoczi 
Message-Id: <20200922083821.578519-7-phi...@redhat.com>
---
 block/nvme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/nvme.c b/block/nvme.c
index 959569d262..b48f6f2588 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -772,7 +772,7 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
CC_EN_MASK);
 /* Wait for CSTS.RDY = 1. */
 now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-deadline = now + timeout_ms * 100;
+deadline = now + timeout_ms * SCALE_MS;
 while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
 if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
 error_setg(errp, "Timeout while waiting for device to start (%"
-- 
2.26.2



[PULL v2 04/17] block/nvme: Drop NVMeRegs structure, directly use NvmeBar

2020-10-05 Thread Stefan Hajnoczi
From: Philippe Mathieu-Daudé 

NVMeRegs only contains NvmeBar. Simplify the code by using NvmeBar
directly.

This triggers a checkpatch.pl error:

  ERROR: Use of volatile is usually wrong, please add a comment
  #30: FILE: block/nvme.c:691:
  +volatile NvmeBar *regs;

This is a false positive as in our case we are using I/O registers,
so the 'volatile' use is justified.

Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Stefan Hajnoczi 
Message-Id: <20200922083821.578519-5-phi...@redhat.com>
---
 block/nvme.c | 23 +--
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index e517c7539f..bd82990b66 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -81,11 +81,6 @@ typedef struct {
 QEMUBH  *completion_bh;
 } NVMeQueuePair;
 
-/* Memory mapped registers */
-typedef volatile struct {
-NvmeBar ctrl;
-} NVMeRegs;
-
 #define INDEX_ADMIN 0
 #define INDEX_IO(n) (1 + n)
 
@@ -694,7 +689,7 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 uint64_t timeout_ms;
 uint64_t deadline, now;
 Error *local_err = NULL;
-NVMeRegs *regs;
+volatile NvmeBar *regs = NULL;
 
 qemu_co_mutex_init(>dma_map_lock);
 qemu_co_queue_init(>dma_flush_queue);
@@ -722,7 +717,7 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 /* Perform initialize sequence as described in NVMe spec "7.6.1
  * Initialization". */
 
-cap = le64_to_cpu(regs->ctrl.cap);
+cap = le64_to_cpu(regs->cap);
 if (!(cap & (1ULL << 37))) {
 error_setg(errp, "Device doesn't support NVMe command set");
 ret = -EINVAL;
@@ -735,10 +730,10 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 3);
 
 /* Reset device to get a clean state. */
-regs->ctrl.cc = cpu_to_le32(le32_to_cpu(regs->ctrl.cc) & 0xFE);
+regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
 /* Wait for CSTS.RDY = 0. */
 deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
-while (le32_to_cpu(regs->ctrl.csts) & 0x1) {
+while (le32_to_cpu(regs->csts) & 0x1) {
 if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
 error_setg(errp, "Timeout while waiting for device to reset (%"
  PRId64 " ms)",
@@ -766,18 +761,18 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 }
 s->nr_queues = 1;
 QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
-regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
-regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
-regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
+regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
+regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
+regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
 
 /* After setting up all control registers we can enable device now. */
-regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
+regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
   (ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
   0x1);
 /* Wait for CSTS.RDY = 1. */
 now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 deadline = now + timeout_ms * 100;
-while (!(le32_to_cpu(regs->ctrl.csts) & 0x1)) {
+while (!(le32_to_cpu(regs->csts) & 0x1)) {
 if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
 error_setg(errp, "Timeout while waiting for device to start (%"
  PRId64 " ms)",
-- 
2.26.2



[PULL v2 00/17] Block patches

2020-10-05 Thread Stefan Hajnoczi
The following changes since commit 469e72ab7dbbd7ff4ee601e5ea7c29545d46593b:

  Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging 
(2020-10-02 16:19:42 +0100)

are available in the Git repository at:

  https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to 9ab5741164b1727d22f69fe7001382baf0d56977:

  util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions 
(2020-10-05 10:59:42 +0100)


Pull request

v2:
 * Removed clang-format call from scripts/block-coroutine-wrapper.py. This
   avoids the issue with clang version incompatibility. It could be added back
   in the future but the code is readable without reformatting and it also
   makes the build less dependent on the environment.



Eric Auger (2):
  util/vfio-helpers: Collect IOVA reserved regions
  util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved
regions

Philippe Mathieu-Daudé (6):
  util/vfio-helpers: Pass page protections to qemu_vfio_pci_map_bar()
  block/nvme: Map doorbells pages write-only
  block/nvme: Reduce I/O registers scope
  block/nvme: Drop NVMeRegs structure, directly use NvmeBar
  block/nvme: Use register definitions from 'block/nvme.h'
  block/nvme: Replace magic value by SCALE_MS definition

Stefano Garzarella (1):
  docs: add 'io_uring' option to 'aio' param in qemu-options.hx

Vladimir Sementsov-Ogievskiy (8):
  block: return error-code from bdrv_invalidate_cache
  block/io: refactor coroutine wrappers
  block: declare some coroutine functions in block/coroutines.h
  scripts: add block-coroutine-wrapper.py
  block: generate coroutine-wrapper code
  block: drop bdrv_prwv
  block/io: refactor save/load vmstate
  include/block/block.h: drop non-ascii quotation mark

 block/block-gen.h  |  49 
 block/coroutines.h |  65 +
 include/block/block.h  |  36 ++-
 include/qemu/vfio-helpers.h|   2 +-
 block.c|  97 +--
 block/io.c | 339 -
 block/nvme.c   |  73 +++---
 tests/test-bdrv-drain.c|   2 +-
 util/vfio-helpers.c| 133 +-
 block/meson.build  |   8 +
 docs/devel/block-coroutine-wrapper.rst |  54 
 docs/devel/index.rst   |   1 +
 qemu-options.hx|  10 +-
 scripts/block-coroutine-wrapper.py | 167 
 14 files changed, 608 insertions(+), 428 deletions(-)
 create mode 100644 block/block-gen.h
 create mode 100644 block/coroutines.h
 create mode 100644 docs/devel/block-coroutine-wrapper.rst
 create mode 100644 scripts/block-coroutine-wrapper.py

-- 
2.26.2



[PULL v2 05/17] block/nvme: Use register definitions from 'block/nvme.h'

2020-10-05 Thread Stefan Hajnoczi
From: Philippe Mathieu-Daudé 

Use the NVMe register definitions from "block/nvme.h" which
ease a bit reviewing the code while matching the datasheet.

Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Stefan Hajnoczi 
Message-Id: <20200922083821.578519-6-phi...@redhat.com>
---
 block/nvme.c | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index bd82990b66..959569d262 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -718,22 +718,22 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
  * Initialization". */
 
 cap = le64_to_cpu(regs->cap);
-if (!(cap & (1ULL << 37))) {
+if (!NVME_CAP_CSS(cap)) {
 error_setg(errp, "Device doesn't support NVMe command set");
 ret = -EINVAL;
 goto out;
 }
 
-s->page_size = MAX(4096, 1 << (12 + ((cap >> 48) & 0xF)));
-s->doorbell_scale = (4 << (((cap >> 32) & 0xF))) / sizeof(uint32_t);
+s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap));
+s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t);
 bs->bl.opt_mem_alignment = s->page_size;
-timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 3);
+timeout_ms = MIN(500 * NVME_CAP_TO(cap), 3);
 
 /* Reset device to get a clean state. */
 regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
 /* Wait for CSTS.RDY = 0. */
 deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
-while (le32_to_cpu(regs->csts) & 0x1) {
+while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
 if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
 error_setg(errp, "Timeout while waiting for device to reset (%"
  PRId64 " ms)",
@@ -761,18 +761,19 @@ static int nvme_init(BlockDriverState *bs, const char 
*device, int namespace,
 }
 s->nr_queues = 1;
 QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
-regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
+regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) |
+(NVME_QUEUE_SIZE << AQA_ASQS_SHIFT));
 regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
 regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
 
 /* After setting up all control registers we can enable device now. */
-regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
-  (ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
-  0x1);
+regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
+   (ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) |
+   CC_EN_MASK);
 /* Wait for CSTS.RDY = 1. */
 now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 deadline = now + timeout_ms * 100;
-while (!(le32_to_cpu(regs->csts) & 0x1)) {
+while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
 if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
 error_setg(errp, "Timeout while waiting for device to start (%"
  PRId64 " ms)",
-- 
2.26.2



Re: [PATCH v10 5/9] copy-on-read: limit guest COR activity to base in COR driver

2020-10-05 Thread Vladimir Sementsov-Ogievskiy

29.09.2020 15:38, Andrey Shinkevich wrote:

Limit the guest's COR operations by the base node in the backing chain
when the base node name is given. It will be useful for a block stream
job when the COR-filter is applied.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 38 --
  1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e04092f..f53f7e0 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -121,8 +121,42 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 size_t qiov_offset,
 int flags)
  {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n = 0;
+int64_t size = offset + bytes;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->base_bs) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (offset < size) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */


But you add the flag only in case of success.. On any failure of furhter 
is*allocated calls we should set the flag.


+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (!ret) {
+ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_bs, false, offset, n, 
);
+if (ret > 0) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
  }
  
  




--
Best regards,
Vladimir



Re: [PATCH v10 4/9] copy-on-read: pass base node name to COR driver

2020-10-05 Thread Vladimir Sementsov-Ogievskiy

29.09.2020 15:38, Andrey Shinkevich wrote:

To limit the guest's COR operations by the base node in the backing


Better to drop "guest's " word. We don't to limit the guest in any


chain during stream job, pass the base node name to the copy-on-read
driver. The rest of the functionality will be implemented in the patch
that follows.



Oops. Seems we want bottom-node, not base, in according with stream job


Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 13 +
  1 file changed, 13 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 3c8231f..e04092f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,19 +24,23 @@
  #include "block/block_int.h"
  #include "qemu/module.h"
  #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
  #include "qapi/qmp/qdict.h"
  #include "block/copy-on-read.h"
  
  
  typedef struct BDRVStateCOR {

  bool active;
+BlockDriverState *base_bs;
  } BDRVStateCOR;
  
  
  static int cor_open(BlockDriverState *bs, QDict *options, int flags,

  Error **errp)
  {
+BlockDriverState *base_bs = NULL;
  BDRVStateCOR *state = bs->opaque;
+const char *base_node = qdict_get_try_str(options, "base");
  
  bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,

 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -52,7 +56,16 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
  ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  bs->file->bs->supported_zero_flags);
  
+if (base_node) {

+qdict_del(options, "base");
+base_bs = bdrv_lookup_bs(NULL, base_node, errp);
+if (!base_bs) {
+error_setg(errp, QERR_BASE_NOT_FOUND, base_node);
+return -EINVAL;
+}
+}
  state->active = true;
+state->base_bs = base_bs;
  
  /*

   * We don't need to call bdrv_child_refresh_perms() now as the permissions




--
Best regards,
Vladimir



Re: [PATCH] ide: clean up ahci_populate_sglist

2020-10-05 Thread Philippe Mathieu-Daudé
On 10/5/20 2:55 PM, Paolo Bonzini wrote:
> Alex reported an uninitialized variable warning in ahci_populate_sglist.
> Even though the warning is bogus and happens only because of -Og, the
> code in the function leaves something to be desired; the condition that
> triggers the warning is easily shown to be entirely redundant.
> 
> In particular, the loop's "if" condition can be rewritten from
> "offset < sum + tbl_entry_size" to "offset - sum < tbl_entry_size";
> this is safe since the LHS cannot underflow.  Because off_pos is
> exactly "offset - sum" it is clear that it can never be less than
> zero or greater than tbl_entry_size.  We can therefore keep the off_idx
> check only and, for documentation purposes, reduce off_pos to an unsigned
> 32-bit integer.
> 
> The tracepoint also is not particularly useful at this point, since
> we know that (if it ever triggers) off_idx will be -1 and off_pos
> uninitialized.  Instead, include the requested offset and the total PRDT
> length, which will be smaller than the offset.
> 

Reported-by: Alex Bennée 
so we know which 'Alex', and:
Reviewed-by: Philippe Mathieu-Daudé 

> Signed-off-by: Paolo Bonzini 
> ---
>  hw/ide/ahci.c   | 12 +---
>  hw/ide/trace-events |  2 +-
>  2 files changed, 6 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
> index 680304a24c..997b67a6fc 100644
> --- a/hw/ide/ahci.c
> +++ b/hw/ide/ahci.c
> @@ -924,8 +924,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, 
> QEMUSGList *sglist,
>  int r = 0;
>  uint64_t sum = 0;
>  int off_idx = -1;
> -int64_t off_pos = -1;
> -int tbl_entry_size;
> +uint32_t off_pos = 0;
>  IDEBus *bus = >port;
>  BusState *qbus = BUS(bus);
>  
> @@ -952,19 +951,18 @@ static int ahci_populate_sglist(AHCIDevice *ad, 
> QEMUSGList *sglist,
>  /* Get entries in the PRDT, init a qemu sglist accordingly */
>  if (prdtl > 0) {
>  AHCI_SG *tbl = (AHCI_SG *)prdt;
> -sum = 0;
>  for (i = 0; i < prdtl; i++) {
> -tbl_entry_size = prdt_tbl_entry_size([i]);
> -if (offset < (sum + tbl_entry_size)) {
> +uint32_t tbl_entry_size = prdt_tbl_entry_size([i]);
> +if (offset - sum < tbl_entry_size) {
>  off_idx = i;
>  off_pos = offset - sum;
>  break;
>  }
>  sum += tbl_entry_size;
>  }
> -if ((off_idx == -1) || (off_pos < 0) || (off_pos > tbl_entry_size)) {
> +if (off_idx == -1) {
>  trace_ahci_populate_sglist_bad_offset(ad->hba, ad->port_no,
> -  off_idx, off_pos);
> +  sum, offset);
>  r = -1;
>  goto out;
>  }
> diff --git a/hw/ide/trace-events b/hw/ide/trace-events
> index 6e357685f9..81706efe80 100644
> --- a/hw/ide/trace-events
> +++ b/hw/ide/trace-events
> @@ -88,7 +88,7 @@ ahci_populate_sglist(void *s, int port) "ahci(%p)[%d]"
>  ahci_populate_sglist_no_prdtl(void *s, int port, uint16_t opts) 
> "ahci(%p)[%d]: no sg list given by guest: 0x%04x"
>  ahci_populate_sglist_no_map(void *s, int port) "ahci(%p)[%d]: DMA mapping 
> failed"
>  ahci_populate_sglist_short_map(void *s, int port) "ahci(%p)[%d]: mapped less 
> than expected"
> -ahci_populate_sglist_bad_offset(void *s, int port, int off_idx, int64_t 
> off_pos) "ahci(%p)[%d]: Incorrect offset! off_idx: %d, off_pos: %"PRId64
> +ahci_populate_sglist_bad_offset(void *s, int port, uint64_t sum, uint64_t 
> offset) "ahci(%p)[%d]: Incorrect offset! total PRDT length %"PRIu64", offset: 
> %"PRIu64
>  ncq_finish(void *s, int port, uint8_t tag) "ahci(%p)[%d][tag:%d]: NCQ 
> transfer finished"
>  execute_ncq_command_read(void *s, int port, uint8_t tag, int count, int64_t 
> lba) "ahci(%p)[%d][tag:%d]: NCQ reading %d sectors from LBA %"PRId64
>  execute_ncq_command_unsup(void *s, int port, uint8_t tag, uint8_t cmd) 
> "ahci(%p)[%d][tag:%d]: error: unsupported NCQ command (0x%02x) received"
> 




Re: [PATCH v10 2/9] copy-on-read: add filter append/drop functions

2020-10-05 Thread Vladimir Sementsov-Ogievskiy

29.09.2020 15:38, Andrey Shinkevich wrote:

Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 84 
  block/copy-on-read.h | 35 ++
  2 files changed, 119 insertions(+)
  create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..3c8231f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
  #include "qemu/osdep.h"
  #include "block/block_int.h"
  #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
  
  
  static int cor_open(BlockDriverState *bs, QDict *options, int flags,

  Error **errp)
  {
+BDRVStateCOR *state = bs->opaque;
+
  bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
 false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
  ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  bs->file->bs->supported_zero_flags);
  
+state->active = true;

+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
  return 0;
  }
  
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,

 uint64_t perm, uint64_t shared,
 uint64_t *nperm, uint64_t *nshared)
  {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
  *nperm = perm & PERM_PASSTHROUGH;
  *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
  
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool locked)
  
  static BlockDriver bdrv_copy_on_read = {

  .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
  
  .bdrv_open  = cor_open,

  .bdrv_child_perm= cor_child_perm,
@@ -159,4 +188,59 @@ static void bdrv_copy_on_read_init(void)
  bdrv_register(_copy_on_read);
  }
  
+

+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ QDict *node_options,
+ int flags, Error **errp)



Ok, now function can add ~any filter, not only COR.. But it's a pair for 
bdrv_cor_filter_drop(), and with "active" hack we don't want make the functions 
generic I think. So it's OK for now to keep function here and named _cor_.


+{
+BlockDriverState *cor_filter_bs;
+Error *local_err = NULL;
+
+cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+if (cor_filter_bs == NULL) {
+error_prepend(errp, "Could not create COR-filter node: ");
+return NULL;
+}


You've dropped setting ->implicit field if filter_node_name not specified. 
Probably caller now can do it.. I don't really care about implicit case, so it's 
OK for me if it works with iotests.

So,

Reviewed-by: Vladimir Sementsov-Ogievskiy 


--
Best regards,
Vladimir



Re: [PULL 00/17] Block patches

2020-10-05 Thread Stefan Hajnoczi
On Thu, Oct 01, 2020 at 04:12:12PM +0100, Peter Maydell wrote:
> On Thu, 1 Oct 2020 at 16:03, Stefan Hajnoczi  wrote:
> >
> > On Thu, Oct 01, 2020 at 12:23:00PM +0100, Peter Maydell wrote:
> > > This produces this error message on ppc64be Linux:
> > >
> > > make: Entering directory `/home/pm215/qemu/build/all'
> > > make[1]: Entering directory `/home/pm215/qemu/slirp'
> > > make[1]: Nothing to be done for `all'.
> > > make[1]: Leaving directory `/home/pm215/qemu/slirp'
> > > Generating qemu-version.h with a meson_exe.py custom command
> > > Generating qemu-options.def with a meson_exe.py custom command
> > > Generating block-gen.c with a custom command
> > > YAML:1:83: error: unknown enumerated scalar
> > > {"IndentWidth": 4, "BraceWrapping": {"AfterFunction": true},
> > > "BreakBeforeBraces": "Custom", "SortIncludes": false,
> > > "MaxEmptyLinesToKeep": 2}
> > >
> > >^~~~
> > > Error parsing -style: Invalid argument, using LLVM style
> > > YAML:1:83: error: unknown enumerated scalar
> > > {"IndentWidth": 4, "BraceWrapping": {"AfterFunction": true},
> > > "BreakBeforeBraces": "Custom", "SortIncludes": false,
> > > "MaxEmptyLinesToKeep": 2}
> > >
> > >^~~~
> > > Error parsing -style: Invalid argument, using LLVM style
> > > Compiling C object libqemuutil.a.p/util_qemu-error.c.o
> > > Compiling C object libqemuutil.a.p/util_qemu-sockets.c.o
> > > Compiling C object libqemuutil.a.p/util_aio-posix.c.o
> > > Compiling C object libqemuutil.a.p/util_osdep.c.o
> > >
> > > The error does not cause the build to fail, which seems
> > > like it's also a bug...
> > >
> > > (My guess is this is due to some script implicitly wanting
> > > a newer version of something or other than the PPC box
> > > happens to have installed, rather than being an endianness
> > > issue.)
> >
> > Please rerun with make -j1 V=1 so the full command is printed. I'm not
> > sure what is emitting these errors.
> 
> Build tree already overwritten to handle a different pullreq,
> I'm afraid. I can come back and retry later...

No problem. Thanks for pointing out the issue, Vladimir and Peter.

I will send a v2.

Stefan


signature.asc
Description: PGP signature


Re: [RFC PATCH 1/3] qom: Always register FW_CFG_DATA_GENERATOR_INTERFACE

2020-10-05 Thread Daniel P . Berrangé
On Mon, Oct 05, 2020 at 12:54:40PM +0200, Philippe Mathieu-Daudé wrote:
> While the FW_CFG_DATA_GENERATOR_INTERFACE is only consumed
> by a device only available using system-mode (fw_cfg), it is
> implemented by a crypto component (tls-cipher-suites) which
> is always available when crypto is used.
> 
> Commit 69699f3055 introduced the following error in the
> qemu-storage-daemon binary:
> 
>   $ echo -e \
> '{"execute": "qmp_capabilities"}\r\n{"execute": 
> "qom-list-types"}\r\n{"execute": "quit"}\r\n' \
> | storage-daemon/qemu-storage-daemon --chardev stdio,id=qmp0  --monitor 
> qmp0
>   {"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 5}, 
> "package": ""}, "capabilities": ["oob"]}}
>   {"return": {}}
>   missing interface 'fw_cfg-data-generator' for object 'tls-creds'
>   Aborted (core dumped)
> 
> Since QOM dependencies are resolved at runtime, this issue
> could not be triggered at linktime, and we don't have test
> running the qemu-storage-daemon binary.
> 
> Fix by always registering the QOM interface.
> 
> Reported-by: Kevin Wolf 
> Fixes: 69699f3055 ("crypto/tls-cipher-suites: Produce fw_cfg consumable blob")
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> I first used:
> 
> +if config_host.has_key('CONFIG_GNUTLS') or have_system
> +  qom_ss.add(files('fw_cfg_interface.c'))
> +endif
> 
> but then realized anything could implement a QOM interface,
> so better keep this generic.
> ---
>  hw/nvram/fw_cfg.c  |  7 ---
>  qom/fw_cfg_interface.c | 15 +++

I feel this should be left in hw/nvram, but still added to qom_ss.

The code location should reflect the functional area and maintainership,
so we shouldn't move code just to satisfy linkage problems.

>  MAINTAINERS|  1 +
>  qom/meson.build|  5 +
>  4 files changed, 21 insertions(+), 7 deletions(-)
>  create mode 100644 qom/fw_cfg_interface.c
> 
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index 0e95d057fd..08539a1aab 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -1360,18 +1360,11 @@ static const TypeInfo fw_cfg_mem_info = {
>  .class_init= fw_cfg_mem_class_init,
>  };
>  
> -static const TypeInfo fw_cfg_data_generator_interface_info = {
> -.parent = TYPE_INTERFACE,
> -.name = TYPE_FW_CFG_DATA_GENERATOR_INTERFACE,
> -.class_size = sizeof(FWCfgDataGeneratorClass),
> -};
> -
>  static void fw_cfg_register_types(void)
>  {
>  type_register_static(_cfg_info);
>  type_register_static(_cfg_io_info);
>  type_register_static(_cfg_mem_info);
> -type_register_static(_cfg_data_generator_interface_info);
>  }
>  
>  type_init(fw_cfg_register_types)
> diff --git a/qom/fw_cfg_interface.c b/qom/fw_cfg_interface.c
> new file mode 100644
> index 00..2b19502ffe
> --- /dev/null
> +++ b/qom/fw_cfg_interface.c
> @@ -0,0 +1,15 @@
> +#include "qemu/osdep.h"
> +#include "hw/nvram/fw_cfg.h"
> +
> +static const TypeInfo fw_cfg_data_generator_interface_info = {
> +.parent = TYPE_INTERFACE,
> +.name = TYPE_FW_CFG_DATA_GENERATOR_INTERFACE,
> +.class_size = sizeof(FWCfgDataGeneratorClass),
> +};
> +
> +static void fw_cfg_register_types(void)
> +{
> +type_register_static(_cfg_data_generator_interface_info);
> +}
> +
> +type_init(fw_cfg_register_types)
> diff --git a/MAINTAINERS b/MAINTAINERS
> index b76fb31861..9c89d54b41 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2055,6 +2055,7 @@ R: Gerd Hoffmann 
>  S: Supported
>  F: docs/specs/fw_cfg.txt
>  F: hw/nvram/fw_cfg.c
> +F: qom/fw_cfg_interface.c
>  F: stubs/fw_cfg.c
>  F: include/hw/nvram/fw_cfg.h
>  F: include/standard-headers/linux/qemu_fw_cfg.h
> diff --git a/qom/meson.build b/qom/meson.build
> index a1cd03c82c..7335f8c8a2 100644
> --- a/qom/meson.build
> +++ b/qom/meson.build
> @@ -7,6 +7,11 @@ qom_ss.add(files(
>'qom-qobject.c',
>  ))
>  
> +# interfaces any object might implement
> +qom_ss.add(files(
> +  'fw_cfg_interface.c',
> +))
> +
>  qmp_ss.add(files('qom-qmp-cmds.c'))
>  softmmu_ss.add(files('qom-hmp-cmds.c'))
>  
> -- 
> 2.26.2
> 

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [RFC PATCH 3/3] tests: Add a trivial qemu-storage-daemon test

2020-10-05 Thread Paolo Bonzini
On 05/10/20 12:54, Philippe Mathieu-Daudé wrote:
> This test fails on top of commit 69699f3055
> ("crypto/tls-cipher-suites: Produce fw_cfg consumable blob")
> because the TYPE_FW_CFG_DATA_GENERATOR_INTERFACE registered
> in hw/nvram/fw_cfg.c is not linked into qemu-storage-daemon:
> 
>   $ make check-block
>   Generating qemu-version.h with a meson_exe.py custom command
> qemu-storage-daemon
>   tests/qemu-storage-daemon.sh: line 10: 2089929 Aborted 
> (core dumped)
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  tests/Makefile.include   |  3 +++
>  tests/qemu-storage-daemon.sh | 10 ++
>  2 files changed, 13 insertions(+)
>  create mode 100755 tests/qemu-storage-daemon.sh
> 
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index d25560..be12581c77 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -142,7 +142,10 @@ endif
>  check: check-block
>  check-block: $(SRC_PATH)/tests/check-block.sh qemu-img$(EXESUF) \
>   qemu-io$(EXESUF) qemu-nbd$(EXESUF) $(QEMU_IOTESTS_HELPERS-y) \
> + storage-daemon/qemu-storage-daemon \
>   $(patsubst %-softmmu,qemu-system-%,$(filter 
> %-softmmu,$(TARGET_DIRS)))
> + $(call quiet-command, \
> + $(SRC_PATH)/tests/qemu-storage-daemon.sh, 
> "qemu-storage-daemon")
>   @$<
>  endif
>  
> diff --git a/tests/qemu-storage-daemon.sh b/tests/qemu-storage-daemon.sh
> new file mode 100755
> index 00..9fd4c73400
> --- /dev/null
> +++ b/tests/qemu-storage-daemon.sh
> @@ -0,0 +1,10 @@
> +#!/bin/sh
> +
> +# Test all QOM dependencies are resolved
> +storage-daemon/qemu-storage-daemon \
> +  --chardev stdio,id=qmp0  --monitor qmp0 \
> +  > /dev/null << 'EOF'
> +{"execute": "qmp_capabilities"}
> +{"execute": "qom-list-types"}
> +{"execute": "quit"}
> +EOF

I think you should either do this as a qemu-iotests testcase, or use
libqtest.

Paolo




[PATCH] ide: clean up ahci_populate_sglist

2020-10-05 Thread Paolo Bonzini
Alex reported an uninitialized variable warning in ahci_populate_sglist.
Even though the warning is bogus and happens only because of -Og, the
code in the function leaves something to be desired; the condition that
triggers the warning is easily shown to be entirely redundant.

In particular, the loop's "if" condition can be rewritten from
"offset < sum + tbl_entry_size" to "offset - sum < tbl_entry_size";
this is safe since the LHS cannot underflow.  Because off_pos is
exactly "offset - sum" it is clear that it can never be less than
zero or greater than tbl_entry_size.  We can therefore keep the off_idx
check only and, for documentation purposes, reduce off_pos to an unsigned
32-bit integer.

The tracepoint also is not particularly useful at this point, since
we know that (if it ever triggers) off_idx will be -1 and off_pos
uninitialized.  Instead, include the requested offset and the total PRDT
length, which will be smaller than the offset.

Signed-off-by: Paolo Bonzini 
---
 hw/ide/ahci.c   | 12 +---
 hw/ide/trace-events |  2 +-
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 680304a24c..997b67a6fc 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -924,8 +924,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList 
*sglist,
 int r = 0;
 uint64_t sum = 0;
 int off_idx = -1;
-int64_t off_pos = -1;
-int tbl_entry_size;
+uint32_t off_pos = 0;
 IDEBus *bus = >port;
 BusState *qbus = BUS(bus);
 
@@ -952,19 +951,18 @@ static int ahci_populate_sglist(AHCIDevice *ad, 
QEMUSGList *sglist,
 /* Get entries in the PRDT, init a qemu sglist accordingly */
 if (prdtl > 0) {
 AHCI_SG *tbl = (AHCI_SG *)prdt;
-sum = 0;
 for (i = 0; i < prdtl; i++) {
-tbl_entry_size = prdt_tbl_entry_size([i]);
-if (offset < (sum + tbl_entry_size)) {
+uint32_t tbl_entry_size = prdt_tbl_entry_size([i]);
+if (offset - sum < tbl_entry_size) {
 off_idx = i;
 off_pos = offset - sum;
 break;
 }
 sum += tbl_entry_size;
 }
-if ((off_idx == -1) || (off_pos < 0) || (off_pos > tbl_entry_size)) {
+if (off_idx == -1) {
 trace_ahci_populate_sglist_bad_offset(ad->hba, ad->port_no,
-  off_idx, off_pos);
+  sum, offset);
 r = -1;
 goto out;
 }
diff --git a/hw/ide/trace-events b/hw/ide/trace-events
index 6e357685f9..81706efe80 100644
--- a/hw/ide/trace-events
+++ b/hw/ide/trace-events
@@ -88,7 +88,7 @@ ahci_populate_sglist(void *s, int port) "ahci(%p)[%d]"
 ahci_populate_sglist_no_prdtl(void *s, int port, uint16_t opts) "ahci(%p)[%d]: 
no sg list given by guest: 0x%04x"
 ahci_populate_sglist_no_map(void *s, int port) "ahci(%p)[%d]: DMA mapping 
failed"
 ahci_populate_sglist_short_map(void *s, int port) "ahci(%p)[%d]: mapped less 
than expected"
-ahci_populate_sglist_bad_offset(void *s, int port, int off_idx, int64_t 
off_pos) "ahci(%p)[%d]: Incorrect offset! off_idx: %d, off_pos: %"PRId64
+ahci_populate_sglist_bad_offset(void *s, int port, uint64_t sum, uint64_t 
offset) "ahci(%p)[%d]: Incorrect offset! total PRDT length %"PRIu64", offset: 
%"PRIu64
 ncq_finish(void *s, int port, uint8_t tag) "ahci(%p)[%d][tag:%d]: NCQ transfer 
finished"
 execute_ncq_command_read(void *s, int port, uint8_t tag, int count, int64_t 
lba) "ahci(%p)[%d][tag:%d]: NCQ reading %d sectors from LBA %"PRId64
 execute_ncq_command_unsup(void *s, int port, uint8_t tag, uint8_t cmd) 
"ahci(%p)[%d][tag:%d]: error: unsupported NCQ command (0x%02x) received"
-- 
2.26.2




[PATCH] migration: block-dirty-bitmap: rewrite dirty_bitmap_load_header

2020-10-05 Thread Paolo Bonzini
Alex reported an uninitialized variable warning in dirty_bitmap_load_header,
where the compiler cannot understand that the !s->cancelled check must be
true for the following one to pass.

Even though the issue happened only because of -Og, the function is very
convoluted.  Just rewrite it to first look up s->bs and then s->bitmap,
and to avoid long sequences of "else if"s.

Signed-off-by: Paolo Bonzini 
---
 migration/block-dirty-bitmap.c | 138 -
 1 file changed, 68 insertions(+), 70 deletions(-)

diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 5bef793ac0..24d749e35e 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -1010,102 +1010,100 @@ static int dirty_bitmap_load_bits(QEMUFile *f, 
DBMLoadState *s)
 static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s,
 GHashTable *alias_map)
 {
-GHashTable *bitmap_alias_map = NULL;
-Error *local_err = NULL;
-bool nothing;
 s->flags = qemu_get_bitmap_flags(f);
 trace_dirty_bitmap_load_header(s->flags);
 
-nothing = s->flags == (s->flags & DIRTY_BITMAP_MIG_FLAG_EOS);
-
+/* Read the whole header early so we can easily cancel in case of errors.  
*/
 if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
 if (!qemu_get_counted_string(f, s->node_alias)) {
 error_report("Unable to read node alias string");
 return -EINVAL;
 }
-
-if (!s->cancelled) {
-if (alias_map) {
-const AliasMapInnerNode *amin;
-
-amin = g_hash_table_lookup(alias_map, s->node_alias);
-if (!amin) {
-error_setg(_err, "Error: Unknown node alias '%s'",
-   s->node_alias);
-s->bs = NULL;
-} else {
-bitmap_alias_map = amin->subtree;
-s->bs = bdrv_lookup_bs(NULL, amin->string, _err);
-}
-} else {
-s->bs = bdrv_lookup_bs(s->node_alias, s->node_alias,
-   _err);
-}
-if (!s->bs) {
-error_report_err(local_err);
-cancel_incoming_locked(s);
-}
+}
+if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+if (!qemu_get_counted_string(f, s->bitmap_alias)) {
+error_report("Unable to read bitmap alias string");
+return -EINVAL;
 }
-} else if (s->bs) {
+}
+
+if ((s->flags & ~DIRTY_BITMAP_MIG_FLAG_EOS) == 0 || s->cancelled) {
+return 0;
+}
+
+if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+Error *local_err = NULL;
 if (alias_map) {
 const AliasMapInnerNode *amin;
 
-/* Must be present in the map, or s->bs would not be set */
 amin = g_hash_table_lookup(alias_map, s->node_alias);
-assert(amin != NULL);
-
-bitmap_alias_map = amin->subtree;
+if (!amin) {
+error_report("Error: Unknown node alias '%s'", s->node_alias);
+s->bs = NULL;
+goto cancel;
+}
+s->bs = bdrv_lookup_bs(NULL, amin->string, _err);
+} else {
+s->bs = bdrv_lookup_bs(s->node_alias, s->node_alias, _err);
 }
-} else if (!nothing && !s->cancelled) {
+if (!s->bs) {
+error_report_err(local_err);
+goto cancel;
+}
+s->bitmap_name[0] = 0;
+}
+if (!s->bs) {
 error_report("Error: block device name is not set");
-cancel_incoming_locked(s);
+goto cancel;
 }
 
-assert(nothing || s->cancelled || !!alias_map == !!bitmap_alias_map);
-
 if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
 const char *bitmap_name;
 
-if (!qemu_get_counted_string(f, s->bitmap_alias)) {
-error_report("Unable to read bitmap alias string");
-return -EINVAL;
-}
+if (alias_map) {
+const AliasMapInnerNode *amin;
+GHashTable *bitmap_alias_map;
 
-if (!s->cancelled) {
-if (bitmap_alias_map) {
-bitmap_name = g_hash_table_lookup(bitmap_alias_map,
-  s->bitmap_alias);
-if (!bitmap_name) {
-error_report("Error: Unknown bitmap alias '%s' on node "
- "'%s' (alias '%s')", s->bitmap_alias,
- s->bs->node_name, s->node_alias);
-cancel_incoming_locked(s);
-}
-} else {
-bitmap_name = s->bitmap_alias;
-}
-}
+amin = g_hash_table_lookup(alias_map, s->node_alias);
+bitmap_alias_map = amin->subtree;
 
-if (!s->cancelled) {
-

Re: [PATCH v5 10/10] migration: introduce snapshot-{save, load, delete} QMP commands

2020-10-05 Thread Markus Armbruster
Daniel P. Berrangé  writes:

> On Mon, Oct 05, 2020 at 09:26:54AM +0200, Markus Armbruster wrote:
>> Eric Blake  writes:
>> 
>> > On 10/2/20 11:27 AM, Daniel P. Berrangé wrote:
>> >
>> > Do we have a query- command handy to easily learn which snapshot names
>> > are even available to attempt deletion on?  If not, that's worth a
>> > separate patch.
>> 
>> Oh, I missed that one.  It's the QMP equivalent to "info snapshots", and
>> it is required to finish the job.  Since we're at v5, I'd be okay with a
>> follow-up patch, as long as it is done for 5.2.
>
> "query-named-block-nodes" returns BlockDeviceInfo struct, which
> contains ImageInfo which contains an array of SnapshotInfo. So
> we don't need any new query command.

My Acked-by stands without a new query then.




Re: [PATCH v2 1/4] keyval: Parse help options

2020-10-05 Thread Markus Armbruster
Markus Armbruster  writes:

[...]
> Making help support opt-in complicates things.  Is there a genuine use
> for not supporting help?  Or is just to keep the users that don't
> support help yet (but should) working without change?  Mind, I'm not
> asking you to make them work, I'm only asking whether you can think of a
> genuine case where help should not work.
>
> What are the existing users that don't support help?  Let's see... many
> in tests/test-keyval.c (ignore), and qobject_input_visitor_new_str().
> That one's used for qemu-system-FOO -audiodev, -display, -blockdev, and
> for qemu-storage-daemon --blockdev, --export, --monitor, --nbd-server.
>
> Attempting to get help for them fails like this:
>
> $ bld-x86/storage-daemon/qemu-storage-daemon --blockdev help
> qemu-storage-daemon: Invalid parameter 'help'
> $ bld-x86/storage-daemon/qemu-storage-daemon --blockdev file,help
> qemu-storage-daemon: Expected '=' after parameter 'help'
>
> I believe making them fail like
>
> qemu-storage-daemon: no help available
>
> would actually be an improvement.

Potential issue: if an option's implied key may have the value "help",
then option argument "help" can be either parsed as "give me help", or
as "implied-key=help".

None of the existing options have this issue:

* audiodev: union Audiodev, implied key "driver" is enum AudiodevDriver,
   "help" is not among its values

* display: union DisplayOptions, implied key "type" is enum
   DisplayType, "help" is not among its values

* blockdev: union BlockdevOptions, implied key "driver is enum
   BlockdevDriver, "help" is not among its values

* export: union BlockExport, implied key "type" is enum BlockExportType,
  "help" is not among its values

* monitor: struct MonitorOptions, implied key "mode"is enum MonitorMode,
  "help" is not among its values

* nbd-server: struct NbdServerOptions, no implied key.

We're good.

What's the risk of not staying good?

The implied keys above are all QAPI enums.

The only existing QAPI enum with a value 'help' is QKeyCode.

The only existing C enum with a name that ends in _HELP is the one
defining Mac keycodes in hw/input/adb-keys.h.

For completeness, I double-checked non of the existing occurences of
string "help" are used as values of option parameters.

We'll almost certainly stay good.

What if we manage to mess this up against all odds?

In my opinion, consistency in getting help is more important than
consistency within a badly designed option: option argument "help"
should give you help even when that means you can't omit the implied key
when its value is "help".

[...]




Re: [PATCH v5 09/14] hw/block/nvme: Support Zoned Namespace Command Set

2020-10-05 Thread Niklas Cassel
On Sun, Oct 04, 2020 at 11:57:07PM +, Dmitry Fomichev wrote:
> On Wed, 2020-09-30 at 14:50 +, Niklas Cassel wrote:
> > On Mon, Sep 28, 2020 at 11:35:23AM +0900, Dmitry Fomichev wrote:
> > > The emulation code has been changed to advertise NVM Command Set when
> > > "zoned" device property is not set (default) and Zoned Namespace
> > > Command Set otherwise.
> > > 
> > > Handlers for three new NVMe commands introduced in Zoned Namespace
> > > Command Set specification are added, namely for Zone Management
> > > Receive, Zone Management Send and Zone Append.
> > > 
> > > Device initialization code has been extended to create a proper
> > > configuration for zoned operation using device properties.
> > > 
> > > Read/Write command handler is modified to only allow writes at the
> > > write pointer if the namespace is zoned. For Zone Append command,
> > > writes implicitly happen at the write pointer and the starting write
> > > pointer value is returned as the result of the command. Write Zeroes
> > > handler is modified to add zoned checks that are identical to those
> > > done as a part of Write flow.
> > > 
> > > The code to support for Zone Descriptor Extensions is not included in
> > > this commit and ZDES 0 is always reported. A later commit in this
> > > series will add ZDE support.
> > > 
> > > This commit doesn't yet include checks for active and open zone
> > > limits. It is assumed that there are no limits on either active or
> > > open zones.
> > > 
> > > Signed-off-by: Niklas Cassel 
> > > Signed-off-by: Hans Holmberg 
> > > Signed-off-by: Ajay Joshi 
> > > Signed-off-by: Chaitanya Kulkarni 
> > > Signed-off-by: Matias Bjorling 
> > > Signed-off-by: Aravind Ramesh 
> > > Signed-off-by: Shin'ichiro Kawasaki 
> > > Signed-off-by: Adam Manzanares 
> > > Signed-off-by: Dmitry Fomichev 
> > > ---
> > >  block/nvme.c |   2 +-
> > >  hw/block/nvme-ns.c   | 185 -
> > >  hw/block/nvme-ns.h   |   6 +-
> > >  hw/block/nvme.c  | 872 +--
> > >  include/block/nvme.h |   6 +-
> > >  5 files changed, 1033 insertions(+), 38 deletions(-)
> > > 
> > > diff --git a/block/nvme.c b/block/nvme.c
> > > index 05485fdd11..7a513c9a17 100644
> > > --- a/block/nvme.c
> > > +++ b/block/nvme.c

(snip)

> > 
> > Please read my comment on nvme_identify_nslist_csi() before reading
> > this comment.
> > 
> > At least for this function, the specification is clear:
> > 
> > "If the host requests a data structure for an I/O Command Set that the
> > controller does not support, the controller shall abort the command with
> > a status of Invalid Field in Command."
> > 
> > If the controller supports the I/O command set == if the Command Set bit
> > is set in the data struct returned by the nvme_identify_cmd_set(),
> > so here we should do something like:
> > 
> > } else if (->csi == NVME_CSI_ZONED && ctrl_has_zns_namespaces()) {
> > ...
> > }
> > 
> 
> With this commit, the controller supports ZNS command set regardless of
> the number of attached ZNS namespaces. It could be zero, but the controller
> still supports it. I think it would be better not to change the behavior
> of this command to depend on whether there are any ZNS namespaces added
> or not.

Ok, always having ZNS Command Set support, regardless if a user defines
a zoned namespace on the QEMU command line or not, does simplify things.

But then in nvme_identify_cmd_set(), you need to call
NVME_SET_CSI(*list, NVME_CSI_ZONED) unconditionally.

(Right now you loop though all namespaces, and only set the support bit
if you find a zoned namespace.)

> > Like I wrote in my review comment in the patch that added support for the 
> > new
> > allocated CNS values, I prefer if we remove this for-loop completely, and
> > simply set attached = true in nvme_ns_setup()/nvme_ns_init() instead.
> > 
> > (I was considering if we should set attach = true in nvme_zoned_init_ns(),
> > but because nvme_ns_setup()/nvme_ns_init() is called for all namespaces,
> > including ZNS namespaces, I don't think that any additional code in
> > nvme_zoned_init_ns() is warranted.)
> 
> I think CC.CSS value is not available during namespace setup and if we
> assign active flag in nvme_zoned_ns_setup(), zoned namespaces may end up
> being active even if NVM Only command set is selected. So keeping this loop
> seems like a good idea.

It is true that CC.CSS is not yet available during namespace setup,
but since the controller itself will never detach namespaces based on
CC.CSS, why are we dependant on CC.CSS being available?

Sure, once someone implements namespace management, they will need
to read if a certain namespace is attached or detached from some
persistent state, perhaps in the zone meta-data file, and set
attached boolean in nvme_ns_init() accordingly, but I still don't see
any dependance on CC.CSS even when namespace management is implemented.



Kind regards,
Niklas


Re: [PATCH v5 10/10] migration: introduce snapshot-{save,load,delete} QMP commands

2020-10-05 Thread Daniel P . Berrangé
On Fri, Oct 02, 2020 at 02:46:07PM -0500, Eric Blake wrote:
> On 10/2/20 11:27 AM, Daniel P. Berrangé wrote:
> > savevm, loadvm and delvm are some of the few HMP commands that have never
> > been converted to use QMP. The reasons for the lack of conversion are
> > that they blocked execution of the event thread, and the semantics
> > around choice of disks were ill-defined.
> > 
> > Despite this downside, however, libvirt and applications using libvirt
> > have used these commands for as long as QMP has existed, via the
> > "human-monitor-command" passthrough command. IOW, while it is clearly
> > desirable to be able to fix the problems, they are not a blocker to
> > all real world usage.
> > 
> > Meanwhile there is a need for other features which involve adding new
> > parameters to the commands. This is possible with HMP passthrough, but
> > it provides no reliable way for apps to introspect features, so using
> > QAPI modelling is highly desirable.
> > 
> > This patch thus introduces new snapshot-{load,save,delete} commands to
> > QMP that are intended to replace the old HMP counterparts. The new
> > commands are given different names, because they will be using the new
> > QEMU job framework and thus will have diverging behaviour from the HMP
> > originals. It would thus be misleading to keep the same name.
> > 
> > While this design uses the generic job framework, the current impl is
> > still blocking. The intention that the blocking problem is fixed later.
> > None the less applications using these new commands should assume that
> > they are asynchronous and thus wait for the job status change event to
> > indicate completion.
> > 
> > In addition to using the job framework, the new commands require the
> > caller to be explicit about all the block device nodes used in the
> > snapshot operations, with no built-in default heuristics in use.
> > 
> > Signed-off-by: Daniel P. Berrangé 
> > ---

> > +++ b/tests/qemu-iotests/group
> > @@ -291,6 +291,7 @@
> >  277 rw quick
> >  279 rw backing quick
> >  280 rw migration quick
> > +310 rw quick
> >  281 rw quick
> >  282 rw img quick
> >  283 auto quick
> 
> What's wrong with sorted order? I get the renumbering to appease a merge
> conflict, but it also requires rearrangement ;)

This file is a conflict magnet when rebasing to git master if you just
append to the end of it. So I picked a number many bigger than current
max, and stuffed the new entry in the middle to avoid rebase conflicts.



Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v5 10/10] migration: introduce snapshot-{save, load, delete} QMP commands

2020-10-05 Thread Daniel P . Berrangé
On Mon, Oct 05, 2020 at 09:26:54AM +0200, Markus Armbruster wrote:
> Eric Blake  writes:
> 
> > On 10/2/20 11:27 AM, Daniel P. Berrangé wrote:
> >
> > Do we have a query- command handy to easily learn which snapshot names
> > are even available to attempt deletion on?  If not, that's worth a
> > separate patch.
> 
> Oh, I missed that one.  It's the QMP equivalent to "info snapshots", and
> it is required to finish the job.  Since we're at v5, I'd be okay with a
> follow-up patch, as long as it is done for 5.2.

"query-named-block-nodes" returns BlockDeviceInfo struct, which
contains ImageInfo which contains an array of SnapshotInfo. So
we don't need any new query command.


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v5 06/14] hw/block/nvme: Add support for active/inactive namespaces

2020-10-05 Thread Niklas Cassel
On Sun, Oct 04, 2020 at 11:54:13PM +, Dmitry Fomichev wrote:
> On Wed, 2020-09-30 at 13:50 +, Niklas Cassel wrote:
> > On Mon, Sep 28, 2020 at 11:35:20AM +0900, Dmitry Fomichev wrote:
> > > From: Niklas Cassel 
> > > 
> > > In NVMe, a namespace is active if it exists and is attached to the
> > > controller.
> > > 
> > > CAP.CSS (together with the I/O Command Set data structure) defines what
> > > command sets are supported by the controller.
> > > 
> > > CC.CSS (together with Set Profile) can be set to enable a subset of the
> > > available command sets. The namespaces belonging to a disabled command set
> > > will not be able to attach to the controller, and will thus be inactive.
> > > 
> > > E.g., if the user sets CC.CSS to Admin Only, NVM namespaces should be
> > > marked as inactive.
> > > 
> > > The identify namespace, the identify namespace CSI specific, and the 
> > > namespace
> > > list commands have two different versions, one that only shows active
> > > namespaces, and the other version that shows existing namespaces, 
> > > regardless
> > > of whether the namespace is attached or not.
> > > 
> > > Add an attached member to struct NvmeNamespace, and implement the missing 
> > > CNS
> > > commands.
> > > 
> > > The added functionality will also simplify the implementation of namespace
> > > management in the future, since namespace management can also attach and
> > > detach namespaces.
> > 
> > Following my previous discussion with Klaus,
> > I think we need to rewrite this commit message completely:
> > 
> > Subject: hw/block/nvme: Add support for allocated CNS command variants
> > 
> > Many CNS commands have "allocated" command variants.
> > These includes a namespace as long as it is allocated
> > (i.e. a namespace is included regardless if it is active (attached)
> > or not.)
> > 
> > While these commands are optional (they are mandatory for controllers
> > supporting the namespace attachment command), our QEMU implementation
> > is more complete by actually providing support for these CNS values.
> > 
> > However, since our QEMU model currently does not support the namespace
> > attachment command, these new allocated CNS commands will return the same
> > result as the active CNS command variants.
> > 
> > In NVMe, a namespace is active if it exists and is attached to the
> > controller.
> > 
> > CAP.CSS (together with the I/O Command Set data structure) defines what
> > command sets are supported by the controller.
> > 
> > CC.CSS (together with Set Profile) can be set to enable a subset of the
> > available command sets.
> > 
> > Even if a user configures CC.CSS to e.g. Admin only, NVM namespaces
> > will still be attached (and thus marked as active).
> > Similarly, if a user configures CC.CSS to e.g. NVM, ZNS namespaces
> > will still be attached (and thus marked as active).
> > 
> > However, any operation from a disabled command set will result in a
> > Invalid Command Opcode.
> > 
> > Add an attached struct member for struct NvmeNamespace,
> > so that we lay the foundation for namespace attachment
> > support. Also implement logic in the new CNS values to
> > include/exclude namespaces based on this new struct member.
> > The only thing missing hooking up the actual Namespace Attachment
> > command opcode, which allows a user to toggle the attached
> > variable per namespace. The reason for not hooking up this
> > command completely is because the NVMe specification
> > requires that the namespace managment command is supported
> > if the namespacement attachment command is supported.
> > 
> 

(snip)

> > > @@ -2276,6 +2304,22 @@ static int nvme_start_ctrl(NvmeCtrl *n)
> > >  nvme_init_sq(>admin_sq, n, n->bar.asq, 0, 0,
> > >   NVME_AQA_ASQS(n->bar.aqa) + 1);
> > >  
> > > +for (i = 1; i <= n->num_namespaces; i++) {
> > > +ns = nvme_ns(n, i);
> > > +if (!ns) {
> > > +continue;
> > > +}
> > > +ns->params.attached = false;
> > > +switch (ns->params.csi) {
> > > +case NVME_CSI_NVM:
> > > +if (NVME_CC_CSS(n->bar.cc) == CSS_NVM_ONLY ||
> > > +NVME_CC_CSS(n->bar.cc) == CSS_CSI) {
> > > +ns->params.attached = true;
> > > +}
> > > +break;
> > > +}
> > > +}
> > > +
> > 
> > Considering that the controller doesn't attach/detach
> > namespaces belonging to command sets that it doesn't
> > support, I think a nicer way is to remove this for-loop,
> > and instead, in nvme_ns_setup() or nvme_ns_init(),
> > always set attached = true. (Since we currently don't
> > support namespace attachment command).
> > 
> > The person that implements the last piece of namespace
> > management and namespace attachment will have to deal
> > with reading "attached" from some kind of persistent state
> 
> 
> I did some spec reading on this topic and it seems that
> this logic is necessary precisely because there is no
> attach/detach command 

Re: [RFC PATCH 0/3] qom: Fix missing interface in qemu-storage-daemon

2020-10-05 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20201005105442.2093105-1-phi...@redhat.com/



Hi,

This series failed build test on FreeBSD host. Please find the details below.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
# Testing script will be invoked under the git checkout with
# HEAD pointing to a commit that has the patches applied on top of "base"
# branch
if qemu-system-x86_64 --help >/dev/null 2>&1; then
  QEMU=qemu-system-x86_64
elif /usr/libexec/qemu-kvm --help >/dev/null 2>&1; then
  QEMU=/usr/libexec/qemu-kvm
else
  exit 1
fi
make vm-build-freebsd J=21 QEMU=$QEMU
exit 0
=== TEST SCRIPT END ===




The full log is available at
http://patchew.org/logs/20201005105442.2093105-1-phi...@redhat.com/testing.FreeBSD/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[RFC PATCH 2/3] tests: Only build socket_scm_helper when a softmmu target is available

2020-10-05 Thread Philippe Mathieu-Daudé
Do not try to build socket_scm_helper if not softmmu target
is available. This fixes:

  $ make check-block
  Generating qemu-version.h with a meson_exe.py custom command
  make: *** No rule to make target 'tests/qemu-iotests/socket_scm_helper', 
needed by 'check-block'.  Stop.

Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/Makefile.include | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 40d909badc..d25560 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -136,7 +136,9 @@ check-acceptance: check-venv $(TESTS_RESULTS_DIR) 
get-vm-images
 check:
 
 ifeq ($(CONFIG_TOOLS)$(CONFIG_POSIX),yy)
+ifneq ($(TARGET_DIRS),)
 QEMU_IOTESTS_HELPERS-$(CONFIG_LINUX) = 
tests/qemu-iotests/socket_scm_helper$(EXESUF)
+endif
 check: check-block
 check-block: $(SRC_PATH)/tests/check-block.sh qemu-img$(EXESUF) \
qemu-io$(EXESUF) qemu-nbd$(EXESUF) $(QEMU_IOTESTS_HELPERS-y) \
-- 
2.26.2




[RFC PATCH 0/3] qom: Fix missing interface in qemu-storage-daemon

2020-10-05 Thread Philippe Mathieu-Daudé
Attempt to fix the issue reported by Kevin.

Introduce a trivial test, but there is currently no
framework in place to test qemu-storage-daemon.

There might be better fix to this QOM issue,
I went for the easiest one I could figure out.

Philippe Mathieu-Daudé (3):
  qom: Always register FW_CFG_DATA_GENERATOR_INTERFACE
  tests: Only build socket_scm_helper when a softmmu target is available
  tests: Add a trivial qemu-storage-daemon test

 hw/nvram/fw_cfg.c|  7 ---
 qom/fw_cfg_interface.c   | 15 +++
 MAINTAINERS  |  1 +
 qom/meson.build  |  5 +
 tests/Makefile.include   |  5 +
 tests/qemu-storage-daemon.sh | 10 ++
 6 files changed, 36 insertions(+), 7 deletions(-)
 create mode 100644 qom/fw_cfg_interface.c
 create mode 100755 tests/qemu-storage-daemon.sh

-- 
2.26.2




[RFC PATCH 3/3] tests: Add a trivial qemu-storage-daemon test

2020-10-05 Thread Philippe Mathieu-Daudé
This test fails on top of commit 69699f3055
("crypto/tls-cipher-suites: Produce fw_cfg consumable blob")
because the TYPE_FW_CFG_DATA_GENERATOR_INTERFACE registered
in hw/nvram/fw_cfg.c is not linked into qemu-storage-daemon:

  $ make check-block
  Generating qemu-version.h with a meson_exe.py custom command
qemu-storage-daemon
  tests/qemu-storage-daemon.sh: line 10: 2089929 Aborted (core 
dumped)

Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/Makefile.include   |  3 +++
 tests/qemu-storage-daemon.sh | 10 ++
 2 files changed, 13 insertions(+)
 create mode 100755 tests/qemu-storage-daemon.sh

diff --git a/tests/Makefile.include b/tests/Makefile.include
index d25560..be12581c77 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -142,7 +142,10 @@ endif
 check: check-block
 check-block: $(SRC_PATH)/tests/check-block.sh qemu-img$(EXESUF) \
qemu-io$(EXESUF) qemu-nbd$(EXESUF) $(QEMU_IOTESTS_HELPERS-y) \
+   storage-daemon/qemu-storage-daemon \
$(patsubst %-softmmu,qemu-system-%,$(filter 
%-softmmu,$(TARGET_DIRS)))
+   $(call quiet-command, \
+   $(SRC_PATH)/tests/qemu-storage-daemon.sh, 
"qemu-storage-daemon")
@$<
 endif
 
diff --git a/tests/qemu-storage-daemon.sh b/tests/qemu-storage-daemon.sh
new file mode 100755
index 00..9fd4c73400
--- /dev/null
+++ b/tests/qemu-storage-daemon.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+# Test all QOM dependencies are resolved
+storage-daemon/qemu-storage-daemon \
+  --chardev stdio,id=qmp0  --monitor qmp0 \
+  > /dev/null << 'EOF'
+{"execute": "qmp_capabilities"}
+{"execute": "qom-list-types"}
+{"execute": "quit"}
+EOF
-- 
2.26.2




[RFC PATCH 1/3] qom: Always register FW_CFG_DATA_GENERATOR_INTERFACE

2020-10-05 Thread Philippe Mathieu-Daudé
While the FW_CFG_DATA_GENERATOR_INTERFACE is only consumed
by a device only available using system-mode (fw_cfg), it is
implemented by a crypto component (tls-cipher-suites) which
is always available when crypto is used.

Commit 69699f3055 introduced the following error in the
qemu-storage-daemon binary:

  $ echo -e \
'{"execute": "qmp_capabilities"}\r\n{"execute": 
"qom-list-types"}\r\n{"execute": "quit"}\r\n' \
| storage-daemon/qemu-storage-daemon --chardev stdio,id=qmp0  --monitor qmp0
  {"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 5}, 
"package": ""}, "capabilities": ["oob"]}}
  {"return": {}}
  missing interface 'fw_cfg-data-generator' for object 'tls-creds'
  Aborted (core dumped)

Since QOM dependencies are resolved at runtime, this issue
could not be triggered at linktime, and we don't have test
running the qemu-storage-daemon binary.

Fix by always registering the QOM interface.

Reported-by: Kevin Wolf 
Fixes: 69699f3055 ("crypto/tls-cipher-suites: Produce fw_cfg consumable blob")
Signed-off-by: Philippe Mathieu-Daudé 
---
I first used:

+if config_host.has_key('CONFIG_GNUTLS') or have_system
+  qom_ss.add(files('fw_cfg_interface.c'))
+endif

but then realized anything could implement a QOM interface,
so better keep this generic.
---
 hw/nvram/fw_cfg.c  |  7 ---
 qom/fw_cfg_interface.c | 15 +++
 MAINTAINERS|  1 +
 qom/meson.build|  5 +
 4 files changed, 21 insertions(+), 7 deletions(-)
 create mode 100644 qom/fw_cfg_interface.c

diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 0e95d057fd..08539a1aab 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -1360,18 +1360,11 @@ static const TypeInfo fw_cfg_mem_info = {
 .class_init= fw_cfg_mem_class_init,
 };
 
-static const TypeInfo fw_cfg_data_generator_interface_info = {
-.parent = TYPE_INTERFACE,
-.name = TYPE_FW_CFG_DATA_GENERATOR_INTERFACE,
-.class_size = sizeof(FWCfgDataGeneratorClass),
-};
-
 static void fw_cfg_register_types(void)
 {
 type_register_static(_cfg_info);
 type_register_static(_cfg_io_info);
 type_register_static(_cfg_mem_info);
-type_register_static(_cfg_data_generator_interface_info);
 }
 
 type_init(fw_cfg_register_types)
diff --git a/qom/fw_cfg_interface.c b/qom/fw_cfg_interface.c
new file mode 100644
index 00..2b19502ffe
--- /dev/null
+++ b/qom/fw_cfg_interface.c
@@ -0,0 +1,15 @@
+#include "qemu/osdep.h"
+#include "hw/nvram/fw_cfg.h"
+
+static const TypeInfo fw_cfg_data_generator_interface_info = {
+.parent = TYPE_INTERFACE,
+.name = TYPE_FW_CFG_DATA_GENERATOR_INTERFACE,
+.class_size = sizeof(FWCfgDataGeneratorClass),
+};
+
+static void fw_cfg_register_types(void)
+{
+type_register_static(_cfg_data_generator_interface_info);
+}
+
+type_init(fw_cfg_register_types)
diff --git a/MAINTAINERS b/MAINTAINERS
index b76fb31861..9c89d54b41 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2055,6 +2055,7 @@ R: Gerd Hoffmann 
 S: Supported
 F: docs/specs/fw_cfg.txt
 F: hw/nvram/fw_cfg.c
+F: qom/fw_cfg_interface.c
 F: stubs/fw_cfg.c
 F: include/hw/nvram/fw_cfg.h
 F: include/standard-headers/linux/qemu_fw_cfg.h
diff --git a/qom/meson.build b/qom/meson.build
index a1cd03c82c..7335f8c8a2 100644
--- a/qom/meson.build
+++ b/qom/meson.build
@@ -7,6 +7,11 @@ qom_ss.add(files(
   'qom-qobject.c',
 ))
 
+# interfaces any object might implement
+qom_ss.add(files(
+  'fw_cfg_interface.c',
+))
+
 qmp_ss.add(files('qom-qmp-cmds.c'))
 softmmu_ss.add(files('qom-hmp-cmds.c'))
 
-- 
2.26.2




Re: [PULL 5/5] crypto/tls-cipher-suites: Produce fw_cfg consumable blob

2020-10-05 Thread Philippe Mathieu-Daudé
Hi Laszlo,

On 10/1/20 9:18 AM, Laszlo Ersek wrote:
> On 09/29/20 17:46, Kevin Wolf wrote:
>> Am 04.07.2020 um 18:39 hat Philippe Mathieu-Daudé geschrieben:
>>> Since our format is consumable by the fw_cfg device,
>>> we can implement the FW_CFG_DATA_GENERATOR interface.
>>>
>>> Example of use to dump the cipher suites (if tracing enabled):
>>>
>>>   $ qemu-system-x86_64 -S \
>>> -object tls-cipher-suites,id=mysuite1,priority=@SYSTEM \
>>> -fw_cfg name=etc/path/to/ciphers,gen_id=mysuite1 \
>>> -trace qcrypto\*
>>>   159066.197123:qcrypto_tls_cipher_suite_priority priority: @SYSTEM
>>>   159066.197219:qcrypto_tls_cipher_suite_info data=[0x13,0x02] 
>>> version=TLS1.3 name=TLS_AES_256_GCM_SHA384
>>>   159066.197228:qcrypto_tls_cipher_suite_info data=[0x13,0x03] 
>>> version=TLS1.3 name=TLS_CHACHA20_POLY1305_SHA256
>>>   159066.197233:qcrypto_tls_cipher_suite_info data=[0x13,0x01] 
>>> version=TLS1.3 name=TLS_AES_128_GCM_SHA256
>>>   159066.197236:qcrypto_tls_cipher_suite_info data=[0x13,0x04] 
>>> version=TLS1.3 name=TLS_AES_128_CCM_SHA256
>>>   159066.197240:qcrypto_tls_cipher_suite_info data=[0xc0,0x30] 
>>> version=TLS1.2 name=TLS_ECDHE_RSA_AES_256_GCM_SHA384
>>>   159066.197245:qcrypto_tls_cipher_suite_info data=[0xcc,0xa8] 
>>> version=TLS1.2 name=TLS_ECDHE_RSA_CHACHA20_POLY1305
>>>   159066.197250:qcrypto_tls_cipher_suite_info data=[0xc0,0x14] 
>>> version=TLS1.0 name=TLS_ECDHE_RSA_AES_256_CBC_SHA1
>>>   159066.197254:qcrypto_tls_cipher_suite_info data=[0xc0,0x2f] 
>>> version=TLS1.2 name=TLS_ECDHE_RSA_AES_128_GCM_SHA256
>>>   159066.197258:qcrypto_tls_cipher_suite_info data=[0xc0,0x13] 
>>> version=TLS1.0 name=TLS_ECDHE_RSA_AES_128_CBC_SHA1
>>>   159066.197261:qcrypto_tls_cipher_suite_info data=[0xc0,0x2c] 
>>> version=TLS1.2 name=TLS_ECDHE_ECDSA_AES_256_GCM_SHA384
>>>   159066.197266:qcrypto_tls_cipher_suite_info data=[0xcc,0xa9] 
>>> version=TLS1.2 name=TLS_ECDHE_ECDSA_CHACHA20_POLY1305
>>>   159066.197270:qcrypto_tls_cipher_suite_info data=[0xc0,0xad] 
>>> version=TLS1.2 name=TLS_ECDHE_ECDSA_AES_256_CCM
>>>   159066.197274:qcrypto_tls_cipher_suite_info data=[0xc0,0x0a] 
>>> version=TLS1.0 name=TLS_ECDHE_ECDSA_AES_256_CBC_SHA1
>>>   159066.197278:qcrypto_tls_cipher_suite_info data=[0xc0,0x2b] 
>>> version=TLS1.2 name=TLS_ECDHE_ECDSA_AES_128_GCM_SHA256
>>>   159066.197283:qcrypto_tls_cipher_suite_info data=[0xc0,0xac] 
>>> version=TLS1.2 name=TLS_ECDHE_ECDSA_AES_128_CCM
>>>   159066.197287:qcrypto_tls_cipher_suite_info data=[0xc0,0x09] 
>>> version=TLS1.0 name=TLS_ECDHE_ECDSA_AES_128_CBC_SHA1
>>>   159066.197291:qcrypto_tls_cipher_suite_info data=[0x00,0x9d] 
>>> version=TLS1.2 name=TLS_RSA_AES_256_GCM_SHA384
>>>   159066.197296:qcrypto_tls_cipher_suite_info data=[0xc0,0x9d] 
>>> version=TLS1.2 name=TLS_RSA_AES_256_CCM
>>>   159066.197300:qcrypto_tls_cipher_suite_info data=[0x00,0x35] 
>>> version=TLS1.0 name=TLS_RSA_AES_256_CBC_SHA1
>>>   159066.197304:qcrypto_tls_cipher_suite_info data=[0x00,0x9c] 
>>> version=TLS1.2 name=TLS_RSA_AES_128_GCM_SHA256
>>>   159066.197308:qcrypto_tls_cipher_suite_info data=[0xc0,0x9c] 
>>> version=TLS1.2 name=TLS_RSA_AES_128_CCM
>>>   159066.197312:qcrypto_tls_cipher_suite_info data=[0x00,0x2f] 
>>> version=TLS1.0 name=TLS_RSA_AES_128_CBC_SHA1
>>>   159066.197316:qcrypto_tls_cipher_suite_info data=[0x00,0x9f] 
>>> version=TLS1.2 name=TLS_DHE_RSA_AES_256_GCM_SHA384
>>>   159066.197320:qcrypto_tls_cipher_suite_info data=[0xcc,0xaa] 
>>> version=TLS1.2 name=TLS_DHE_RSA_CHACHA20_POLY1305
>>>   159066.197325:qcrypto_tls_cipher_suite_info data=[0xc0,0x9f] 
>>> version=TLS1.2 name=TLS_DHE_RSA_AES_256_CCM
>>>   159066.197329:qcrypto_tls_cipher_suite_info data=[0x00,0x39] 
>>> version=TLS1.0 name=TLS_DHE_RSA_AES_256_CBC_SHA1
>>>   159066.197333:qcrypto_tls_cipher_suite_info data=[0x00,0x9e] 
>>> version=TLS1.2 name=TLS_DHE_RSA_AES_128_GCM_SHA256
>>>   159066.197337:qcrypto_tls_cipher_suite_info data=[0xc0,0x9e] 
>>> version=TLS1.2 name=TLS_DHE_RSA_AES_128_CCM
>>>   159066.197341:qcrypto_tls_cipher_suite_info data=[0x00,0x33] 
>>> version=TLS1.0 name=TLS_DHE_RSA_AES_128_CBC_SHA1
>>>   159066.197345:qcrypto_tls_cipher_suite_count count: 29
>>>
>>> Signed-off-by: Philippe Mathieu-Daudé 
>>> Reviewed-by: Daniel P. Berrangé 
>>> Acked-by: Laszlo Ersek 
>>> Message-Id: <20200623172726.21040-6-phi...@redhat.com>
>>
>> I noticed only now that this breaks '--object help' in
>> qemu-storage-daemon:
>>
>> $ qemu-storage-daemon --object help
>> List of user creatable objects:
>> qemu-storage-daemon: missing interface 'fw_cfg-data-generator' for object 
>> 'tls-creds'
>> Aborted (core dumped)
>>
>> The reason is that we don't (and can't) link hw/nvram/fw_cfg.c into the
>> storage daemon because it requires other system emulator stuff.
> 
> Ouch. I've been completely oblivious to "--object help" and how it
> affects qemu-storage-daemon. Sorry about that.
> 

Re: [PATCH v2] sd: Exhibit support for CMD23

2020-10-05 Thread Philippe Mathieu-Daudé
Hi Sai,

On 9/16/20 7:51 PM, Sai Pavan Boddu wrote:
> Update 'SCR.CMD_SUPPORT' register with support of CMD23.
> 
> Signed-off-by: Sai Pavan Boddu 
> Reported-by: Rahul Thati 
> ---
> Changes for V2:
>   Fix commit message
> 
>  hw/sd/sd.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/sd/sd.c b/hw/sd/sd.c
> index 0012882..16d1b61 100644
> --- a/hw/sd/sd.c
> +++ b/hw/sd/sd.c
> @@ -335,7 +335,7 @@ static void sd_set_scr(SDState *sd)
>  if (sd->spec_version >= SD_PHY_SPECv3_01_VERS) {
>  sd->scr[2] |= 1 << 7;   /* Spec Version 3.0X */
>  }
> -sd->scr[3] = 0x00;
> +sd->scr[3] = 0x2;   /* CMD23 supported */

You need to check:
- spec_version >= SD_PHY_SPECv3_01_VERS
- sd->size > SDSC_MAX_CAPACITY

Then you should also update the TRAN_SPEED value in sd_set_csd().

>  /* reserved for manufacturer usage */
>  sd->scr[4] = 0x00;
>  sd->scr[5] = 0x00;
> 



Re: [PATCH v5 10/10] migration: introduce snapshot-{save, load, delete} QMP commands

2020-10-05 Thread Markus Armbruster
Eric Blake  writes:

> On 10/2/20 11:27 AM, Daniel P. Berrangé wrote:
>> savevm, loadvm and delvm are some of the few HMP commands that have never
>> been converted to use QMP. The reasons for the lack of conversion are
>> that they blocked execution of the event thread, and the semantics
>> around choice of disks were ill-defined.
>> 
>> Despite this downside, however, libvirt and applications using libvirt
>> have used these commands for as long as QMP has existed, via the
>> "human-monitor-command" passthrough command. IOW, while it is clearly
>> desirable to be able to fix the problems, they are not a blocker to
>> all real world usage.
>> 
>> Meanwhile there is a need for other features which involve adding new
>> parameters to the commands. This is possible with HMP passthrough, but
>> it provides no reliable way for apps to introspect features, so using
>> QAPI modelling is highly desirable.
>> 
>> This patch thus introduces new snapshot-{load,save,delete} commands to
>> QMP that are intended to replace the old HMP counterparts. The new
>> commands are given different names, because they will be using the new
>> QEMU job framework and thus will have diverging behaviour from the HMP
>> originals. It would thus be misleading to keep the same name.
>> 
>> While this design uses the generic job framework, the current impl is
>> still blocking. The intention that the blocking problem is fixed later.
>> None the less applications using these new commands should assume that
>> they are asynchronous and thus wait for the job status change event to
>> indicate completion.
>> 
>> In addition to using the job framework, the new commands require the
>> caller to be explicit about all the block device nodes used in the
>> snapshot operations, with no built-in default heuristics in use.
>> 
>> Signed-off-by: Daniel P. Berrangé 
>> ---
>
>> +++ b/qapi/job.json
>> @@ -22,10 +22,17 @@
>>  #
>>  # @amend: image options amend job type, see "x-blockdev-amend" (since 5.1)
>>  #
>> +# @snapshot-load: snapshot load job type, see "snapshot-load" (since 5.2)
>> +#
>> +# @snapshot-save: snapshot save job type, see "snapshot-save" (since 5.2)
>> +#
>> +# @snapshot-delete: snapshot delete job type, see "snapshot-delete" (since 
>> 5.2)
>> +#
>>  # Since: 1.7
>>  ##
>>  { 'enum': 'JobType',
>> -  'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'amend'] }
>> +  'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'amend',
>> +   'snapshot-load', 'snapshot-save', 'snapshot-delete'] }
>>  
>>  ##
>>  # @JobStatus:
>> diff --git a/qapi/migration.json b/qapi/migration.json
>> index 7f5e6fd681..d2bd551ad9 100644
>> --- a/qapi/migration.json
>> +++ b/qapi/migration.json
>> @@ -1787,3 +1787,123 @@
>>  # Since: 5.2
>>  ##
>>  { 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' }
>> +
>> +##
>> +# @snapshot-save:
>> +#
>> +# Save a VM snapshot
>> +#
>> +# @job-id: identifier for the newly created job
>> +# @tag: name of the snapshot to create
>> +# @devices: list of block device node names to save a snapshot to
>> +# @vmstate: block device node name to save vmstate to
>
> Here, you document vmstate last,...
>
>> +#
>> +# Applications should not assume that the snapshot save is complete
>> +# when this command returns. The job commands / events must be used
>> +# to determine completion and to fetch details of any errors that arise.
>> +#
>> +# Note that the VM CPUs will be paused during the time it takes to
>> +# save the snapshot

End the sentence with a period, please.

> "will be", or "may be"?  As you stated above, we may be able to lift the
> synchronous limitations down the road, while still maintaining the
> present interface of using this command to start the job and waiting on
> the job id until it is finished, at which point the CPUs might not need
> to be paused as much.

Could also add a sentence like "This may change in the future".

>> +#
>> +# It is strongly recommended that @devices contain all writable
>> +# block device nodes if a consistent snapshot is required.
>> +#
>> +# If @tag already exists, an error will be reported
>> +#
>> +# Returns: nothing
>> +#
>> +# Example:
>> +#
>> +# -> { "execute": "snapshot-save",
>> +#  "data": {
>> +# "job-id": "snapsave0",
>> +# "tag": "my-snap",
>> +# "vmstate": "disk0",
>> +# "devices": ["disk0", "disk1"]
>
> ...here vmstate occurs before devices.  I don't know if our doc
> generator cares about inconsistent ordering.

It does not.  It's questionable style, though.  Easy enough to tidy up.

>> +#  }
>> +#}
>> +# <- { "return": { } }
>> +#
>> +# Since: 5.2
>> +##
>> +{ 'command': 'snapshot-save',
>> +  'data': { 'job-id': 'str',
>> +'tag': 'str',
>> +'vmstate': 'str',
>> +'devices': ['str'] } }
>> +
>> +##
>> +# @snapshot-load:
>> +#
>> +# Load a VM snapshot
>> +#
>> +# @job-id: identifier for the newly created job
>> +# @tag: name of the snapshot to load.
>> +# 

Re: [PATCH 1/4] vmdk: fix maybe uninitialized warnings

2020-10-05 Thread Christian Borntraeger
On 30.09.20 18:36, Fam Zheng wrote:
> On Wed, 2020-09-30 at 17:58 +0200, Christian Borntraeger wrote:
>> Fedora 32 gcc 10 seems to give false positives:
>>
>> Compiling C object libblock.fa.p/block_vmdk.c.o
>> ../block/vmdk.c: In function ‘vmdk_parse_extents’:
>> ../block/vmdk.c:587:5: error: ‘extent’ may be used uninitialized in
>> this function [-Werror=maybe-uninitialized]
>>   587 | g_free(extent->l1_table);
>>   | ^~~~
>> ../block/vmdk.c:754:17: note: ‘extent’ was declared here
>>   754 | VmdkExtent *extent;
>>   | ^~
>> ../block/vmdk.c:620:11: error: ‘extent’ may be used uninitialized in
>> this function [-Werror=maybe-uninitialized]
>>   620 | ret = vmdk_init_tables(bs, extent, errp);
>>   |   ^~
>> ../block/vmdk.c:598:17: note: ‘extent’ was declared here
>>   598 | VmdkExtent *extent;
>>   | ^~
>> ../block/vmdk.c:1178:39: error: ‘extent’ may be used uninitialized in
>> this function [-Werror=maybe-uninitialized]
>>  1178 | extent->flat_start_offset = flat_offset << 9;
>>   | ~~^~
>> ../block/vmdk.c: In function ‘vmdk_open_vmdk4’:
>> ../block/vmdk.c:581:22: error: ‘extent’ may be used uninitialized in
>> this function [-Werror=maybe-uninitialized]
>>   581 | extent->l2_cache =
>>   | ~^
>>   582 | g_malloc(extent->entry_size * extent->l2_size *
>> L2_CACHE_SIZE);
>>   | ~
>> ~
>> ../block/vmdk.c:872:17: note: ‘extent’ was declared here
>>   872 | VmdkExtent *extent;
>>   | ^~
>> ../block/vmdk.c: In function ‘vmdk_open’:
>> ../block/vmdk.c:620:11: error: ‘extent’ may be used uninitialized in
>> this function [-Werror=maybe-uninitialized]
>>   620 | ret = vmdk_init_tables(bs, extent, errp);
>>   |   ^~
>> ../block/vmdk.c:598:17: note: ‘extent’ was declared here
>>   598 | VmdkExtent *extent;
>>   | ^~
>> cc1: all warnings being treated as errors
>> make: *** [Makefile.ninja:884: libblock.fa.p/block_vmdk.c.o] Error 1
>>
>> fix them by assigning a default value.
>>
>> Signed-off-by: Christian Borntraeger 
>> ---
>>  block/vmdk.c | 8 
>>  1 file changed, 4 insertions(+), 4 deletions(-)
>>
>> diff --git a/block/vmdk.c b/block/vmdk.c
>> index 8ec62c7ab798..a00dc00eb47a 100644
>> --- a/block/vmdk.c
>> +++ b/block/vmdk.c
>> @@ -595,7 +595,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState
>> *bs,
>>  int ret;
>>  uint32_t magic;
>>  VMDK3Header header;
>> -VmdkExtent *extent;
>> +VmdkExtent *extent = NULL;
>>  
>>  ret = bdrv_pread(file, sizeof(magic), , sizeof(header));
>>  if (ret < 0) {
>> @@ -751,7 +751,7 @@ static int vmdk_open_se_sparse(BlockDriverState
>> *bs,
>>  int ret;
>>  VMDKSESparseConstHeader const_header;
>>  VMDKSESparseVolatileHeader volatile_header;
>> -VmdkExtent *extent;
>> +VmdkExtent *extent = NULL;
>>  
>>  ret = bdrv_apply_auto_read_only(bs,
>>  "No write support for seSparse images available", errp);
>> @@ -869,7 +869,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
>>  uint32_t magic;
>>  uint32_t l1_size, l1_entry_sectors;
>>  VMDK4Header header;
>> -VmdkExtent *extent;
>> +VmdkExtent *extent = NULL;
>>  BDRVVmdkState *s = bs->opaque;
>>  int64_t l1_backup_offset = 0;
>>  bool compressed;
>> @@ -1088,7 +1088,7 @@ static int vmdk_parse_extents(const char *desc,
>> BlockDriverState *bs,
>>  BdrvChild *extent_file;
>>  BdrvChildRole extent_role;
>>  BDRVVmdkState *s = bs->opaque;
>> -VmdkExtent *extent;
>> +VmdkExtent *extent = NULL;
>>  char extent_opt_prefix[32];
>>  Error *local_err = NULL;
>>  
> 
> Looks trivial, and correct.
> 
> Reviewed-by: Fam Zheng 


Will this go via the block or trivial tree (cced). 



Re: [PATCH 2/4] nbd: silence maybe-uninitialized warnings

2020-10-05 Thread Christian Borntraeger
On 30.09.20 19:19, Eric Blake wrote:
> On 9/30/20 10:58 AM, Christian Borntraeger wrote:
>> gcc 10 from Fedora 32 gives me:
>>
>> Compiling C object libblock.fa.p/nbd_server.c.o
>> ../nbd/server.c: In function ‘nbd_co_client_start’:
>> ../nbd/server.c:625:14: error: ‘namelen’ may be used uninitialized in this 
>> function [-Werror=maybe-uninitialized]
>>   625 | rc = nbd_negotiate_send_info(client, NBD_INFO_NAME, namelen, 
>> name,
>>   |  
>> ^
>>   626 |  errp);
>>   |  ~
>> ../nbd/server.c:564:14: note: ‘namelen’ was declared here
>>   564 | uint32_t namelen;
>>   |  ^~~
>> cc1: all warnings being treated as errors
>>
>> As I cannot see how this can happen, let uns silence the warning.
> 
> gcc is smart enough to see that nbd_opt_read_name(... ), which
> is the only use of namelen between declaration and use, does not always
> initialize namelen; but fails to see we also exit this function early in
> the same conditions when nbd_opt_read_name left namelen uninit.  The
> workaround is fine.
> 
> Reviewed-by: Eric Blake 
> 
> I'm happy for this to go in through the trivial tree, but I'll also
> queue it on my NBD tree if that is ready first.

Just in case cc qemu-trival. 



Re: [PATCH 4/4] virtiofsd: avoid false positive compiler warning

2020-10-05 Thread Christian Borntraeger



On 30.09.20 18:27, Dr. David Alan Gilbert wrote:
> * Christian Borntraeger (borntrae...@de.ibm.com) wrote:
>> make: *** [Makefile:121: config-host.mak] Error 1
>> [cborntra@m83lp52 qemu]$ make -C build/
>> make: Entering directory '/home/cborntra/REPOS/qemu/build'
>> Generating qemu-version.h with a meson_exe.py custom command
>> Compiling C object tools/virtiofsd/virtiofsd.p/passthrough_ll.c.o
>> ../tools/virtiofsd/passthrough_ll.c: In function ‘lo_setattr’:
>> ../tools/virtiofsd/passthrough_ll.c:702:19: error: ‘fd’ may be used 
>> uninitialized in this function [-Werror=maybe-uninitialized]
>>   702 | res = futimens(fd, tv);
>>   |   ^~~~
>> cc1: all warnings being treated as errors
>> make: *** [Makefile.ninja:1438: 
>> tools/virtiofsd/virtiofsd.p/passthrough_ll.c.o] Error 1
>> make: Leaving directory '/home/cborntra/REPOS/
>>
>> as far as I can see this can not happen. Let us silence the warning by
>> giving fd a default value.
>>
>> Signed-off-by: Christian Borntraeger 
> 
> Yeh, I'd posted 
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg738783.html
> but not yet merged it; only difference is I'd used -1 since it seemd
> safer to use -1 even if it couldn't happen :-)

Agreed, lets go with your patch.