Re: [PATCH v2 14/20] nvme: allow multiple aios per command

2019-11-25 Thread Beata Michalska
On Thu, 21 Nov 2019 at 11:57, Klaus Birkelund  wrote:
>
> On Tue, Nov 12, 2019 at 03:25:06PM +, Beata Michalska wrote:
> > Hi Klaus,
> >
> > On Tue, 15 Oct 2019 at 11:55, Klaus Jensen  wrote:
> > > @@ -341,19 +344,18 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, 
> > > uint8_t *ptr, uint32_t len,
> > Any reason why the nvme_dma_write_prp is missing the changes applied
> > to nvme_dma_read_prp ?
> >
>
> This was adressed by proxy through changes to the previous patch
> (by combining the read/write functions).
>
> > > +case NVME_AIO_OPC_WRITE_ZEROES:
> > > +block_acct_start(stats, acct, aio->iov.size, BLOCK_ACCT_WRITE);
> > > +aio->aiocb = blk_aio_pwrite_zeroes(aio->blk, aio->offset,
> > > +aio->iov.size, BDRV_REQ_MAY_UNMAP, nvme_aio_cb, aio);
> > Minor: aio->blk  => blk
> >
>
> Thanks. Fixed this in a couple of other places as well.
>
> > > @@ -621,8 +880,11 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd 
> > > *cmd)
> > >  sq = n->sq[qid];
> > >  while (!QTAILQ_EMPTY(>out_req_list)) {
> > >  req = QTAILQ_FIRST(>out_req_list);
> > > -assert(req->aiocb);
> > > -blk_aio_cancel(req->aiocb);
> > > +while (!QTAILQ_EMPTY(>aio_tailq)) {
> > > +aio = QTAILQ_FIRST(>aio_tailq);
> > > +assert(aio->aiocb);
> > > +blk_aio_cancel(aio->aiocb);
> > What about releasing memory associated with given aio ?
>
> I believe the callback is still called when cancelled? That should take
> care of it. Or have I misunderstood that? At least for the DMAAIOCBs it
> is.
>
It seems that the completion callback is supposed to be called.
My bad.

BR
Beata
> > > +struct NvmeAIO {
> > > +NvmeRequest *req;
> > > +
> > > +NvmeAIOOp   opc;
> > > +int64_t offset;
> > > +BlockBackend*blk;
> > > +BlockAIOCB  *aiocb;
> > > +BlockAcctCookie acct;
> > > +
> > > +NvmeAIOCompletionFunc *cb;
> > > +void  *cb_arg;
> > > +
> > > +QEMUSGList   *qsg;
> > > +QEMUIOVector iov;
> >
> > There is a bit of inconsistency on the ownership of IOVs and SGLs.
> > SGLs now seem to be owned by request whereas IOVs by the aio.
> > WOuld be good to have that unified or documented at least.
> >
>
> Fixed this. The NvmeAIO only holds pointers now.
>
> > > +#define NVME_REQ_TRANSFER_DMA  0x1
> > This one does not seem to be used 
> >
>
> I have dropped the flags and reverted to a simple req->is_cmb as that is
> all that is really needed.
>



Re: [PATCH v2 14/20] nvme: allow multiple aios per command

2019-11-21 Thread Klaus Birkelund
On Tue, Nov 12, 2019 at 03:25:06PM +, Beata Michalska wrote:
> Hi Klaus,
> 
> On Tue, 15 Oct 2019 at 11:55, Klaus Jensen  wrote:
> > @@ -341,19 +344,18 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, 
> > uint8_t *ptr, uint32_t len,
> Any reason why the nvme_dma_write_prp is missing the changes applied
> to nvme_dma_read_prp ?
> 

This was adressed by proxy through changes to the previous patch
(by combining the read/write functions).

> > +case NVME_AIO_OPC_WRITE_ZEROES:
> > +block_acct_start(stats, acct, aio->iov.size, BLOCK_ACCT_WRITE);
> > +aio->aiocb = blk_aio_pwrite_zeroes(aio->blk, aio->offset,
> > +aio->iov.size, BDRV_REQ_MAY_UNMAP, nvme_aio_cb, aio);
> Minor: aio->blk  => blk
> 

Thanks. Fixed this in a couple of other places as well.

> > @@ -621,8 +880,11 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
> >  sq = n->sq[qid];
> >  while (!QTAILQ_EMPTY(>out_req_list)) {
> >  req = QTAILQ_FIRST(>out_req_list);
> > -assert(req->aiocb);
> > -blk_aio_cancel(req->aiocb);
> > +while (!QTAILQ_EMPTY(>aio_tailq)) {
> > +aio = QTAILQ_FIRST(>aio_tailq);
> > +assert(aio->aiocb);
> > +blk_aio_cancel(aio->aiocb);
> What about releasing memory associated with given aio ?

I believe the callback is still called when cancelled? That should take
care of it. Or have I misunderstood that? At least for the DMAAIOCBs it
is.

> > +struct NvmeAIO {
> > +NvmeRequest *req;
> > +
> > +NvmeAIOOp   opc;
> > +int64_t offset;
> > +BlockBackend*blk;
> > +BlockAIOCB  *aiocb;
> > +BlockAcctCookie acct;
> > +
> > +NvmeAIOCompletionFunc *cb;
> > +void  *cb_arg;
> > +
> > +QEMUSGList   *qsg;
> > +QEMUIOVector iov;
> 
> There is a bit of inconsistency on the ownership of IOVs and SGLs.
> SGLs now seem to be owned by request whereas IOVs by the aio.
> WOuld be good to have that unified or documented at least.
> 

Fixed this. The NvmeAIO only holds pointers now.

> > +#define NVME_REQ_TRANSFER_DMA  0x1
> This one does not seem to be used 
> 

I have dropped the flags and reverted to a simple req->is_cmb as that is
all that is really needed.




Re: [PATCH v2 14/20] nvme: allow multiple aios per command

2019-11-12 Thread Beata Michalska
Hi Klaus,

On Tue, 15 Oct 2019 at 11:55, Klaus Jensen  wrote:
>
> This refactors how the device issues asynchronous block backend
> requests. The NvmeRequest now holds a queue of NvmeAIOs that are
> associated with the command. This allows multiple aios to be issued for
> a command. Only when all requests have been completed will the device
> post a completion queue entry.
>
> Because the device is currently guaranteed to only issue a single aio
> request per command, the benefit is not immediately obvious. But this
> functionality is required to support metadata.
>
> Signed-off-by: Klaus Jensen 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 455 +-
>  hw/block/nvme.h   | 165 ---
>  hw/block/trace-events |   8 +
>  3 files changed, 511 insertions(+), 117 deletions(-)
>
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index cbc0b6a660b6..f4b9bd36a04e 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -25,6 +25,8 @@
>   *  Default: 64
>   *   cmb_size_mb= : Size of Controller Memory Buffer in MBs.
>   *  Default: 0 (disabled)
> + *   mdts= : Maximum Data Transfer Size (power of two)
> + *  Default: 7
>   */
>
>  #include "qemu/osdep.h"
> @@ -56,6 +58,7 @@
>  } while (0)
>
>  static void nvme_process_sq(void *opaque);
> +static void nvme_aio_cb(void *opaque, int ret);
>
>  static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
>  {
> @@ -197,7 +200,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> *qsg, uint64_t prp1,
>  }
>
>  if (nvme_addr_is_cmb(n, prp1)) {
> -req->is_cmb = true;
> +nvme_req_set_cmb(req);
>  }
>
>  pci_dma_sglist_init(qsg, >parent_obj, num_prps);
> @@ -255,8 +258,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> *qsg, uint64_t prp1,
>  }
>
>  addr_is_cmb = nvme_addr_is_cmb(n, prp_ent);
> -if ((req->is_cmb && !addr_is_cmb) ||
> -(!req->is_cmb && addr_is_cmb)) {
> +if ((nvme_req_is_cmb(req) && !addr_is_cmb) ||
> +(!nvme_req_is_cmb(req) && addr_is_cmb)) {
>  status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
>  goto unmap;
>  }
> @@ -269,8 +272,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList 
> *qsg, uint64_t prp1,
>  }
>  } else {
>  bool addr_is_cmb = nvme_addr_is_cmb(n, prp2);
> -if ((req->is_cmb && !addr_is_cmb) ||
> -(!req->is_cmb && addr_is_cmb)) {
> +if ((nvme_req_is_cmb(req) && !addr_is_cmb) ||
> +(!nvme_req_is_cmb(req) && addr_is_cmb)) {
>  status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
>  goto unmap;
>  }
> @@ -312,7 +315,7 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t 
> *ptr, uint32_t len,
>  return status;
>  }
>
> -if (req->is_cmb) {
> +if (nvme_req_is_cmb(req)) {
>  QEMUIOVector iov;
>
>  qemu_iovec_init(, qsg.nsg);
> @@ -341,19 +344,18 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t 
> *ptr, uint32_t len,
Any reason why the nvme_dma_write_prp is missing the changes applied
to nvme_dma_read_prp ?

>  static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
>  uint64_t prp1, uint64_t prp2, NvmeRequest *req)
>  {
> -QEMUSGList qsg;
>  uint16_t status = NVME_SUCCESS;
>
> -status = nvme_map_prp(n, , prp1, prp2, len, req);
> +status = nvme_map_prp(n, >qsg, prp1, prp2, len, req);
>  if (status) {
>  return status;
>  }
>
> -if (req->is_cmb) {
> +if (nvme_req_is_cmb(req)) {
>  QEMUIOVector iov;
>
> -qemu_iovec_init(, qsg.nsg);
> -dma_to_cmb(n, , );
> +qemu_iovec_init(, req->qsg.nsg);
> +dma_to_cmb(n, >qsg, );
>
>  if (unlikely(qemu_iovec_from_buf(, 0, ptr, len) != len)) {
>  trace_nvme_err_invalid_dma();
> @@ -365,17 +367,137 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t 
> *ptr, uint32_t len,
>  goto out;
>  }
>
> -if (unlikely(dma_buf_read(ptr, len, ))) {
> +if (unlikely(dma_buf_read(ptr, len, >qsg))) {
>  trace_nvme_err_invalid_dma();
>  status = NVME_INVALID_FIELD | NVME_DNR;
>  }
>
>  out:
> -qemu_sglist_destroy();
> +qemu_sglist_destroy(>qsg);
>
>  return status;
>  }
>
> +static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +{
> +NvmeNamespace *ns = req->ns;
> +
> +uint32_t len = req->nlb << nvme_ns_lbads(ns);
> +uint64_t prp1 = le64_to_cpu(cmd->prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->prp2);
> +
> +return nvme_map_prp(n, >qsg, prp1, prp2, len, req);
> +}
> +
> +static void nvme_aio_destroy(NvmeAIO *aio)
> +{
> +if (aio->iov.nalloc) {
> +

[PATCH v2 14/20] nvme: allow multiple aios per command

2019-10-15 Thread Klaus Jensen
This refactors how the device issues asynchronous block backend
requests. The NvmeRequest now holds a queue of NvmeAIOs that are
associated with the command. This allows multiple aios to be issued for
a command. Only when all requests have been completed will the device
post a completion queue entry.

Because the device is currently guaranteed to only issue a single aio
request per command, the benefit is not immediately obvious. But this
functionality is required to support metadata.

Signed-off-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c   | 455 +-
 hw/block/nvme.h   | 165 ---
 hw/block/trace-events |   8 +
 3 files changed, 511 insertions(+), 117 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index cbc0b6a660b6..f4b9bd36a04e 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -25,6 +25,8 @@
  *  Default: 64
  *   cmb_size_mb= : Size of Controller Memory Buffer in MBs.
  *  Default: 0 (disabled)
+ *   mdts= : Maximum Data Transfer Size (power of two)
+ *  Default: 7
  */
 
 #include "qemu/osdep.h"
@@ -56,6 +58,7 @@
 } while (0)
 
 static void nvme_process_sq(void *opaque);
+static void nvme_aio_cb(void *opaque, int ret);
 
 static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
 {
@@ -197,7 +200,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
uint64_t prp1,
 }
 
 if (nvme_addr_is_cmb(n, prp1)) {
-req->is_cmb = true;
+nvme_req_set_cmb(req);
 }
 
 pci_dma_sglist_init(qsg, >parent_obj, num_prps);
@@ -255,8 +258,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
uint64_t prp1,
 }
 
 addr_is_cmb = nvme_addr_is_cmb(n, prp_ent);
-if ((req->is_cmb && !addr_is_cmb) ||
-(!req->is_cmb && addr_is_cmb)) {
+if ((nvme_req_is_cmb(req) && !addr_is_cmb) ||
+(!nvme_req_is_cmb(req) && addr_is_cmb)) {
 status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
 goto unmap;
 }
@@ -269,8 +272,8 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, 
uint64_t prp1,
 }
 } else {
 bool addr_is_cmb = nvme_addr_is_cmb(n, prp2);
-if ((req->is_cmb && !addr_is_cmb) ||
-(!req->is_cmb && addr_is_cmb)) {
+if ((nvme_req_is_cmb(req) && !addr_is_cmb) ||
+(!nvme_req_is_cmb(req) && addr_is_cmb)) {
 status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
 goto unmap;
 }
@@ -312,7 +315,7 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t 
*ptr, uint32_t len,
 return status;
 }
 
-if (req->is_cmb) {
+if (nvme_req_is_cmb(req)) {
 QEMUIOVector iov;
 
 qemu_iovec_init(, qsg.nsg);
@@ -341,19 +344,18 @@ static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t 
*ptr, uint32_t len,
 static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
 uint64_t prp1, uint64_t prp2, NvmeRequest *req)
 {
-QEMUSGList qsg;
 uint16_t status = NVME_SUCCESS;
 
-status = nvme_map_prp(n, , prp1, prp2, len, req);
+status = nvme_map_prp(n, >qsg, prp1, prp2, len, req);
 if (status) {
 return status;
 }
 
-if (req->is_cmb) {
+if (nvme_req_is_cmb(req)) {
 QEMUIOVector iov;
 
-qemu_iovec_init(, qsg.nsg);
-dma_to_cmb(n, , );
+qemu_iovec_init(, req->qsg.nsg);
+dma_to_cmb(n, >qsg, );
 
 if (unlikely(qemu_iovec_from_buf(, 0, ptr, len) != len)) {
 trace_nvme_err_invalid_dma();
@@ -365,17 +367,137 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t 
*ptr, uint32_t len,
 goto out;
 }
 
-if (unlikely(dma_buf_read(ptr, len, ))) {
+if (unlikely(dma_buf_read(ptr, len, >qsg))) {
 trace_nvme_err_invalid_dma();
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
 
 out:
-qemu_sglist_destroy();
+qemu_sglist_destroy(>qsg);
 
 return status;
 }
 
+static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+NvmeNamespace *ns = req->ns;
+
+uint32_t len = req->nlb << nvme_ns_lbads(ns);
+uint64_t prp1 = le64_to_cpu(cmd->prp1);
+uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+return nvme_map_prp(n, >qsg, prp1, prp2, len, req);
+}
+
+static void nvme_aio_destroy(NvmeAIO *aio)
+{
+if (aio->iov.nalloc) {
+qemu_iovec_destroy(>iov);
+}
+
+g_free(aio);
+}
+
+static NvmeAIO *nvme_aio_new(BlockBackend *blk, int64_t offset,
+QEMUSGList *qsg, NvmeRequest *req, NvmeAIOCompletionFunc *cb)
+{
+NvmeAIO *aio = g_malloc0(sizeof(*aio));
+
+*aio = (NvmeAIO) {
+.blk = blk,
+.offset = offset,
+.req = req,
+.qsg = qsg,
+.cb = cb,
+};
+
+if (qsg &&