Re: [PATCH v5 16/26] nvme: refactor prp mapping

2020-03-25 Thread Maxim Levitsky
On Mon, 2020-03-16 at 00:51 -0700, Klaus Birkelund Jensen wrote:
> On Feb 12 13:44, Maxim Levitsky wrote:
> > On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > > Refactor nvme_map_prp and allow PRPs to be located in the CMB. The logic
> > > ensures that if some of the PRP is in the CMB, all of it must be located
> > > there, as per the specification.
> > 
> > To be honest this looks like not refactoring but a bugfix
> > (old code was just assuming that if first prp entry is in cmb, the rest 
> > also is)
> 
> I split it up into a separate bugfix patch.
> 
> > > 
> > > Also combine nvme_dma_{read,write}_prp into a single nvme_dma_prp that
> > > takes an additional DMADirection parameter.
> > 
> > To be honest 'nvme_dma_prp' was not a clear function name to me at first 
> > glance.
> > Could you rename this to nvme_dma_prp_rw or so? (Although even that is 
> > somewhat unclear
> > to convey the meaning of read/write the data to/from the guest memory areas 
> > defined by the prp list.
> > Also could you split this change into a new patch?
> > 
> 
> Splitting into new patch.
> 
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > Signed-off-by: Klaus Jensen 
> > 
> > Now you even use your both addresses :-)
> > 
> > > ---
> > >  hw/block/nvme.c   | 245 +++---
> > >  hw/block/nvme.h   |   2 +-
> > >  hw/block/trace-events |   1 +
> > >  include/block/nvme.h  |   1 +
> > >  4 files changed, 160 insertions(+), 89 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index 4acfc85b56a2..334265efb21e 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -58,6 +58,11 @@
> > >  
> > >  static void nvme_process_sq(void *opaque);
> > >  
> > > +static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
> > > +{
> > > +return >cmbuf[addr - n->ctrl_mem.addr];
> > > +}
> > 
> > To my taste I would put this together with the patch that
> > added nvme_addr_is_cmb. I know that some people are against
> > this citing the fact that you should use the code you add
> > in the same patch. Your call.
> > 
> > Regardless of this I also prefer to put refactoring patches first in the 
> > series.
Thanks!
> > 
> > > +
> > >  static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
> > >  {
> > >  hwaddr low = n->ctrl_mem.addr;
> > > @@ -152,138 +157,187 @@ static void nvme_irq_deassert(NvmeCtrl *n, 
> > > NvmeCQueue *cq)
> > >  }
> > >  }
> > >  
> > > -static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, 
> > > uint64_t prp1,
> > > - uint64_t prp2, uint32_t len, NvmeCtrl *n)
> > > +static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector 
> > > *iov,
> > > +uint64_t prp1, uint64_t prp2, uint32_t len, NvmeRequest *req)
> > 
> > Split line alignment (it was correct before).
> > Also while at the refactoring, it would be great to add some documentation
> > to this and few more functions, since its not clear immediately what this 
> > does.
> > 
> > 
> > >  {
> > >  hwaddr trans_len = n->page_size - (prp1 % n->page_size);
> > >  trans_len = MIN(len, trans_len);
> > >  int num_prps = (len >> n->page_bits) + 1;
> > > +uint16_t status = NVME_SUCCESS;
> > > +bool is_cmb = false;
> > > +bool prp_list_in_cmb = false;
> > > +
> > > +trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, 
> > > len,
> > > +prp1, prp2, num_prps);
> > >  
> > >  if (unlikely(!prp1)) {
> > >  trace_nvme_dev_err_invalid_prp();
> > >  return NVME_INVALID_FIELD | NVME_DNR;
> > > -} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
> > > -   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) 
> > > {
> > > -qsg->nsg = 0;
> > > +}
> > > +
> > > +if (nvme_addr_is_cmb(n, prp1)) {
> > > +is_cmb = true;
> > > +
> > >  qemu_iovec_init(iov, num_prps);
> > > -qemu_iovec_add(iov, (void *)>cmbuf[prp1 - n->ctrl_mem.addr], 
> > > trans_len);
> > > +
> > > +/*
> > > + * PRPs do not cross page boundaries, so if the start address 
> > > (here,
> > > + * prp1) is within the CMB, it cannot cross outside the 
> > > controller
> > > + * memory buffer range. This is ensured by
> > > + *
> > > + *   len = n->page_size - (addr % n->page_size)
> > > + *
> > > + * Thus, we can directly add to the iovec without risking an out 
> > > of
> > > + * bounds access. This also holds for the remaining 
> > > qemu_iovec_add
> > > + * calls.
> > > + */
> > > +qemu_iovec_add(iov, nvme_addr_to_cmb(n, prp1), trans_len);
> > >  } else {
> > >  pci_dma_sglist_init(qsg, >parent_obj, num_prps);
> > >  qemu_sglist_add(qsg, prp1, trans_len);
> > >  }
> > > +
> > >  len -= trans_len;
> > >  if (len) {
> > >  if (unlikely(!prp2)) {
> > >  

Re: [PATCH v5 16/26] nvme: refactor prp mapping

2020-03-16 Thread Klaus Birkelund Jensen
On Feb 12 13:44, Maxim Levitsky wrote:
> On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> > Refactor nvme_map_prp and allow PRPs to be located in the CMB. The logic
> > ensures that if some of the PRP is in the CMB, all of it must be located
> > there, as per the specification.
> 
> To be honest this looks like not refactoring but a bugfix
> (old code was just assuming that if first prp entry is in cmb, the rest also 
> is)

I split it up into a separate bugfix patch.

> > 
> > Also combine nvme_dma_{read,write}_prp into a single nvme_dma_prp that
> > takes an additional DMADirection parameter.
> 
> To be honest 'nvme_dma_prp' was not a clear function name to me at first 
> glance.
> Could you rename this to nvme_dma_prp_rw or so? (Although even that is 
> somewhat unclear
> to convey the meaning of read/write the data to/from the guest memory areas 
> defined by the prp list.
> Also could you split this change into a new patch?
> 

Splitting into new patch.

> > 
> > Signed-off-by: Klaus Jensen 
> > Signed-off-by: Klaus Jensen 
> Now you even use your both addresses :-)
> 
> > ---
> >  hw/block/nvme.c   | 245 +++---
> >  hw/block/nvme.h   |   2 +-
> >  hw/block/trace-events |   1 +
> >  include/block/nvme.h  |   1 +
> >  4 files changed, 160 insertions(+), 89 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 4acfc85b56a2..334265efb21e 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -58,6 +58,11 @@
> >  
> >  static void nvme_process_sq(void *opaque);
> >  
> > +static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
> > +{
> > +return >cmbuf[addr - n->ctrl_mem.addr];
> > +}
> 
> To my taste I would put this together with the patch that
> added nvme_addr_is_cmb. I know that some people are against
> this citing the fact that you should use the code you add
> in the same patch. Your call.
> 
> Regardless of this I also prefer to put refactoring patches first in the 
> series.
> 
> > +
> >  static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
> >  {
> >  hwaddr low = n->ctrl_mem.addr;
> > @@ -152,138 +157,187 @@ static void nvme_irq_deassert(NvmeCtrl *n, 
> > NvmeCQueue *cq)
> >  }
> >  }
> >  
> > -static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t 
> > prp1,
> > - uint64_t prp2, uint32_t len, NvmeCtrl *n)
> > +static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector 
> > *iov,
> > +uint64_t prp1, uint64_t prp2, uint32_t len, NvmeRequest *req)
> 
> Split line alignment (it was correct before).
> Also while at the refactoring, it would be great to add some documentation
> to this and few more functions, since its not clear immediately what this 
> does.
> 
> 
> >  {
> >  hwaddr trans_len = n->page_size - (prp1 % n->page_size);
> >  trans_len = MIN(len, trans_len);
> >  int num_prps = (len >> n->page_bits) + 1;
> > +uint16_t status = NVME_SUCCESS;
> > +bool is_cmb = false;
> > +bool prp_list_in_cmb = false;
> > +
> > +trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len,
> > +prp1, prp2, num_prps);
> >  
> >  if (unlikely(!prp1)) {
> >  trace_nvme_dev_err_invalid_prp();
> >  return NVME_INVALID_FIELD | NVME_DNR;
> > -} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
> > -   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
> > -qsg->nsg = 0;
> > +}
> > +
> > +if (nvme_addr_is_cmb(n, prp1)) {
> > +is_cmb = true;
> > +
> >  qemu_iovec_init(iov, num_prps);
> > -qemu_iovec_add(iov, (void *)>cmbuf[prp1 - n->ctrl_mem.addr], 
> > trans_len);
> > +
> > +/*
> > + * PRPs do not cross page boundaries, so if the start address 
> > (here,
> > + * prp1) is within the CMB, it cannot cross outside the controller
> > + * memory buffer range. This is ensured by
> > + *
> > + *   len = n->page_size - (addr % n->page_size)
> > + *
> > + * Thus, we can directly add to the iovec without risking an out of
> > + * bounds access. This also holds for the remaining qemu_iovec_add
> > + * calls.
> > + */
> > +qemu_iovec_add(iov, nvme_addr_to_cmb(n, prp1), trans_len);
> >  } else {
> >  pci_dma_sglist_init(qsg, >parent_obj, num_prps);
> >  qemu_sglist_add(qsg, prp1, trans_len);
> >  }
> > +
> >  len -= trans_len;
> >  if (len) {
> >  if (unlikely(!prp2)) {
> >  trace_nvme_dev_err_invalid_prp2_missing();
> > +status = NVME_INVALID_FIELD | NVME_DNR;
> >  goto unmap;
> >  }
> > +
> >  if (len > n->page_size) {
> >  uint64_t prp_list[n->max_prp_ents];
> >  uint32_t nents, prp_trans;
> >  int i = 0;
> >  
> > +if (nvme_addr_is_cmb(n, prp2)) {
> > +prp_list_in_cmb 

Re: [PATCH v5 16/26] nvme: refactor prp mapping

2020-02-12 Thread Maxim Levitsky
On Tue, 2020-02-04 at 10:51 +0100, Klaus Jensen wrote:
> Refactor nvme_map_prp and allow PRPs to be located in the CMB. The logic
> ensures that if some of the PRP is in the CMB, all of it must be located
> there, as per the specification.

To be honest this looks like not refactoring but a bugfix
(old code was just assuming that if first prp entry is in cmb, the rest also is)
> 
> Also combine nvme_dma_{read,write}_prp into a single nvme_dma_prp that
> takes an additional DMADirection parameter.

To be honest 'nvme_dma_prp' was not a clear function name to me at first glance.
Could you rename this to nvme_dma_prp_rw or so? (Although even that is somewhat 
unclear
to convey the meaning of read/write the data to/from the guest memory areas 
defined by the prp list.
Also could you split this change into a new patch?

> 
> Signed-off-by: Klaus Jensen 
> Signed-off-by: Klaus Jensen 
Now you even use your both addresses :-)

> ---
>  hw/block/nvme.c   | 245 +++---
>  hw/block/nvme.h   |   2 +-
>  hw/block/trace-events |   1 +
>  include/block/nvme.h  |   1 +
>  4 files changed, 160 insertions(+), 89 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 4acfc85b56a2..334265efb21e 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -58,6 +58,11 @@
>  
>  static void nvme_process_sq(void *opaque);
>  
> +static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
> +{
> +return >cmbuf[addr - n->ctrl_mem.addr];
> +}

To my taste I would put this together with the patch that
added nvme_addr_is_cmb. I know that some people are against
this citing the fact that you should use the code you add
in the same patch. Your call.

Regardless of this I also prefer to put refactoring patches first in the series.

> +
>  static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
>  {
>  hwaddr low = n->ctrl_mem.addr;
> @@ -152,138 +157,187 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue 
> *cq)
>  }
>  }
>  
> -static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t 
> prp1,
> - uint64_t prp2, uint32_t len, NvmeCtrl *n)
> +static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
> +uint64_t prp1, uint64_t prp2, uint32_t len, NvmeRequest *req)

Split line alignment (it was correct before).
Also while at the refactoring, it would be great to add some documentation
to this and few more functions, since its not clear immediately what this does.


>  {
>  hwaddr trans_len = n->page_size - (prp1 % n->page_size);
>  trans_len = MIN(len, trans_len);
>  int num_prps = (len >> n->page_bits) + 1;
> +uint16_t status = NVME_SUCCESS;
> +bool is_cmb = false;
> +bool prp_list_in_cmb = false;
> +
> +trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len,
> +prp1, prp2, num_prps);
>  
>  if (unlikely(!prp1)) {
>  trace_nvme_dev_err_invalid_prp();
>  return NVME_INVALID_FIELD | NVME_DNR;
> -} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
> -   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
> -qsg->nsg = 0;
> +}
> +
> +if (nvme_addr_is_cmb(n, prp1)) {
> +is_cmb = true;
> +
>  qemu_iovec_init(iov, num_prps);
> -qemu_iovec_add(iov, (void *)>cmbuf[prp1 - n->ctrl_mem.addr], 
> trans_len);
> +
> +/*
> + * PRPs do not cross page boundaries, so if the start address (here,
> + * prp1) is within the CMB, it cannot cross outside the controller
> + * memory buffer range. This is ensured by
> + *
> + *   len = n->page_size - (addr % n->page_size)
> + *
> + * Thus, we can directly add to the iovec without risking an out of
> + * bounds access. This also holds for the remaining qemu_iovec_add
> + * calls.
> + */
> +qemu_iovec_add(iov, nvme_addr_to_cmb(n, prp1), trans_len);
>  } else {
>  pci_dma_sglist_init(qsg, >parent_obj, num_prps);
>  qemu_sglist_add(qsg, prp1, trans_len);
>  }
> +
>  len -= trans_len;
>  if (len) {
>  if (unlikely(!prp2)) {
>  trace_nvme_dev_err_invalid_prp2_missing();
> +status = NVME_INVALID_FIELD | NVME_DNR;
>  goto unmap;
>  }
> +
>  if (len > n->page_size) {
>  uint64_t prp_list[n->max_prp_ents];
>  uint32_t nents, prp_trans;
>  int i = 0;
>  
> +if (nvme_addr_is_cmb(n, prp2)) {
> +prp_list_in_cmb = true;
> +}
> +
>  nents = (len + n->page_size - 1) >> n->page_bits;
>  prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
> -nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
> +nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
>  while (len != 0) {
>  uint64_t prp_ent = 

[PATCH v5 16/26] nvme: refactor prp mapping

2020-02-04 Thread Klaus Jensen
Refactor nvme_map_prp and allow PRPs to be located in the CMB. The logic
ensures that if some of the PRP is in the CMB, all of it must be located
there, as per the specification.

Also combine nvme_dma_{read,write}_prp into a single nvme_dma_prp that
takes an additional DMADirection parameter.

Signed-off-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 hw/block/nvme.c   | 245 +++---
 hw/block/nvme.h   |   2 +-
 hw/block/trace-events |   1 +
 include/block/nvme.h  |   1 +
 4 files changed, 160 insertions(+), 89 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 4acfc85b56a2..334265efb21e 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -58,6 +58,11 @@
 
 static void nvme_process_sq(void *opaque);
 
+static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
+{
+return >cmbuf[addr - n->ctrl_mem.addr];
+}
+
 static inline bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
 {
 hwaddr low = n->ctrl_mem.addr;
@@ -152,138 +157,187 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue 
*cq)
 }
 }
 
-static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
- uint64_t prp2, uint32_t len, NvmeCtrl *n)
+static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
+uint64_t prp1, uint64_t prp2, uint32_t len, NvmeRequest *req)
 {
 hwaddr trans_len = n->page_size - (prp1 % n->page_size);
 trans_len = MIN(len, trans_len);
 int num_prps = (len >> n->page_bits) + 1;
+uint16_t status = NVME_SUCCESS;
+bool is_cmb = false;
+bool prp_list_in_cmb = false;
+
+trace_nvme_dev_map_prp(nvme_cid(req), req->cmd.opcode, trans_len, len,
+prp1, prp2, num_prps);
 
 if (unlikely(!prp1)) {
 trace_nvme_dev_err_invalid_prp();
 return NVME_INVALID_FIELD | NVME_DNR;
-} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
-   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
-qsg->nsg = 0;
+}
+
+if (nvme_addr_is_cmb(n, prp1)) {
+is_cmb = true;
+
 qemu_iovec_init(iov, num_prps);
-qemu_iovec_add(iov, (void *)>cmbuf[prp1 - n->ctrl_mem.addr], 
trans_len);
+
+/*
+ * PRPs do not cross page boundaries, so if the start address (here,
+ * prp1) is within the CMB, it cannot cross outside the controller
+ * memory buffer range. This is ensured by
+ *
+ *   len = n->page_size - (addr % n->page_size)
+ *
+ * Thus, we can directly add to the iovec without risking an out of
+ * bounds access. This also holds for the remaining qemu_iovec_add
+ * calls.
+ */
+qemu_iovec_add(iov, nvme_addr_to_cmb(n, prp1), trans_len);
 } else {
 pci_dma_sglist_init(qsg, >parent_obj, num_prps);
 qemu_sglist_add(qsg, prp1, trans_len);
 }
+
 len -= trans_len;
 if (len) {
 if (unlikely(!prp2)) {
 trace_nvme_dev_err_invalid_prp2_missing();
+status = NVME_INVALID_FIELD | NVME_DNR;
 goto unmap;
 }
+
 if (len > n->page_size) {
 uint64_t prp_list[n->max_prp_ents];
 uint32_t nents, prp_trans;
 int i = 0;
 
+if (nvme_addr_is_cmb(n, prp2)) {
+prp_list_in_cmb = true;
+}
+
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
+nvme_addr_read(n, prp2, (void *) prp_list, prp_trans);
 while (len != 0) {
 uint64_t prp_ent = le64_to_cpu(prp_list[i]);
 
 if (i == n->max_prp_ents - 1 && len > n->page_size) {
 if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
 trace_nvme_dev_err_invalid_prplist_ent(prp_ent);
+status = NVME_INVALID_FIELD | NVME_DNR;
+goto unmap;
+}
+
+if (prp_list_in_cmb != nvme_addr_is_cmb(n, prp_ent)) {
+status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
 goto unmap;
 }
 
 i = 0;
 nents = (len + n->page_size - 1) >> n->page_bits;
 prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-nvme_addr_read(n, prp_ent, (void *)prp_list,
-prp_trans);
+nvme_addr_read(n, prp_ent, (void *) prp_list, prp_trans);
 prp_ent = le64_to_cpu(prp_list[i]);
 }
 
 if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
 trace_nvme_dev_err_invalid_prplist_ent(prp_ent);
+status = NVME_INVALID_FIELD | NVME_DNR;
+goto unmap;
+}