[Qemu-block] [PATCH] qemu-block: add support HMB with feature commands.

2017-10-21 Thread Minwoo Im
Add support HMB(Host Memory Block) with feature commands(Get Feature, Set 
Feature).
nvme-4.14 tree supports HMB features.
This patch will make nvme controller to return 32MiB preferred size of HMB to 
host via identify command.
Set Feature, Get Feature implemented for HMB.

Signed-off-by: Minwoo Im <minwoo.im@gmail.com>
---
 hw/block/nvme.c | 35 +++
 hw/block/nvme.h | 21 -
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 6071dc1..d351781 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -605,6 +605,23 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
 }
 }
 
+static uint32_t nvme_get_feature_hmb(NvmeCtrl *n, NvmeCmd *cmd)
+{
+uint32_t result = n->hmb_flag.flag;
+uint64_t prp1 = le64_to_cpu(cmd->prp1);
+uint64_t prp2 = le64_to_cpu(cmd->prp2);
+NvmeHmbAttr attr = {0, };
+
+attr.hsize = cpu_to_le32(n->hmb_attr.hsize);
+attr.hmdlal = cpu_to_le32(n->hmb_attr.hmdlal);
+attr.hmdlau = cpu_to_le32(n->hmb_attr.hmdlau);
+attr.hmdlec = cpu_to_le32(n->hmb_attr.hmdlec);
+
+nvme_dma_read_prp(n, (uint8_t *), sizeof(attr), prp1, prp2);
+
+return result;
+}
+
 static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
 uint32_t dw10 = le32_to_cpu(cmd->cdw10);
@@ -617,6 +634,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, 
NvmeRequest *req)
 case NVME_NUMBER_OF_QUEUES:
 result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 
16));
 break;
+case NVME_HOST_MEMORY_BUFFER:
+result = nvme_get_feature_hmb(n, cmd);
+break;
 default:
 return NVME_INVALID_FIELD | NVME_DNR;
 }
@@ -625,6 +645,16 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
*cmd, NvmeRequest *req)
 return NVME_SUCCESS;
 }
 
+static void nvme_set_feature_hmb(NvmeCtrl *n, NvmeCmd *cmd)
+{
+n->hmb_flag.flag = le32_to_cpu(cmd->cdw11);
+
+n->hmb_attr.hsize = le32_to_cpu(cmd->cdw12);
+n->hmb_attr.hmdlal = le32_to_cpu(cmd->cdw13);
+n->hmb_attr.hmdlau = le32_to_cpu(cmd->cdw14);
+n->hmb_attr.hmdlec = le32_to_cpu(cmd->cdw15);
+}
+
 static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
 uint32_t dw10 = le32_to_cpu(cmd->cdw10);
@@ -638,6 +668,9 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, 
NvmeRequest *req)
 req->cqe.result =
 cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
 break;
+case NVME_HOST_MEMORY_BUFFER:
+nvme_set_feature_hmb(n, cmd);
+break;
 default:
 return NVME_INVALID_FIELD | NVME_DNR;
 }
@@ -985,6 +1018,8 @@ static int nvme_init(PCIDevice *pci_dev)
 id->oacs = cpu_to_le16(0);
 id->frmw = 7 << 1;
 id->lpa = 1 << 0;
+id->hmpre = 0x2000;
+id->hmmin = 0x0;
 id->sqes = (0x6 << 4) | 0x6;
 id->cqes = (0x4 << 4) | 0x4;
 id->nn = cpu_to_le32(n->num_namespaces);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 6aab338..fab748b 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -552,7 +552,10 @@ typedef struct NvmeIdCtrl {
 uint8_t lpa;
 uint8_t elpe;
 uint8_t npss;
-uint8_t rsvd511[248];
+uint8_t rsvd271[8];
+uint32_thmpre;
+uint32_thmmin;
+uint8_t rsvd511[232];
 uint8_t sqes;
 uint8_t cqes;
 uint16_trsvd515;
@@ -623,9 +626,22 @@ enum NvmeFeatureIds {
 NVME_INTERRUPT_VECTOR_CONF  = 0x9,
 NVME_WRITE_ATOMICITY= 0xa,
 NVME_ASYNCHRONOUS_EVENT_CONF= 0xb,
+NVME_HOST_MEMORY_BUFFER = 0xd,
 NVME_SOFTWARE_PROGRESS_MARKER   = 0x80
 };
 
+typedef struct NvmeHmbFlag {
+uint32_tflag;
+} NvmeHmbFlag;
+
+typedef struct NvmeHmbAttr {
+uint32_thsize;
+uint32_thmdlal;
+uint32_thmdlau;
+uint32_thmdlec;
+uint8_t rsvd4095[4080];
+} NvmeHmbAttr;
+
 typedef struct NvmeRangeType {
 uint8_t type;
 uint8_t attributes;
@@ -776,6 +792,9 @@ typedef struct NvmeCtrl {
 uint32_tcmbloc;
 uint8_t *cmbuf;
 
+NvmeHmbFlag hmb_flag;
+NvmeHmbAttr hmb_attr;
+
 char*serial;
 NvmeNamespace   *namespaces;
 NvmeSQueue  **sq;
-- 
2.7.4




Re: [Qemu-block] [PATCH] qemu-block: add support HMB with feature commands.

2017-10-28 Thread Minwoo Im
I'll send a patch only when there is a clear real use case.
Thanks for your reply.

On Mon, Oct 23, 2017 at 11:30 PM, Keith Busch <keith.bu...@intel.com> wrote:
> On Sat, Oct 21, 2017 at 03:54:16PM +0900, Minwoo Im wrote:
>> Add support HMB(Host Memory Block) with feature commands(Get Feature, Set 
>> Feature).
>> nvme-4.14 tree supports HMB features.
>> This patch will make nvme controller to return 32MiB preferred size of HMB 
>> to host via identify command.
>> Set Feature, Get Feature implemented for HMB.
>>
>> Signed-off-by: Minwoo Im <minwoo.im@gmail.com>
>
> Nak; this patch doesn't have a use for host memory. It's just advertising
> the need without ever accessing it.



Re: [Qemu-block] [PATCH] qemu-block: add support HMB with feature commands.

2017-10-28 Thread Minwoo Im
On Tue, Oct 24, 2017 at 1:18 AM, Stefan Hajnoczi <stefa...@gmail.com> wrote:
> On Sat, Oct 21, 2017 at 03:54:16PM +0900, Minwoo Im wrote:
>> Add support HMB(Host Memory Block) with feature commands(Get Feature, Set 
>> Feature).
>> nvme-4.14 tree supports HMB features.
>> This patch will make nvme controller to return 32MiB preferred size of HMB 
>> to host via identify command.
>> Set Feature, Get Feature implemented for HMB.
>>
>> Signed-off-by: Minwoo Im <minwoo.im@gmail.com>
>> ---
>>  hw/block/nvme.c | 35 +++
>>  hw/block/nvme.h | 21 -
>>  2 files changed, 55 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
>> index 6071dc1..d351781 100644
>> --- a/hw/block/nvme.c
>> +++ b/hw/block/nvme.c
>> @@ -605,6 +605,23 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
>>  }
>>  }
>>
>> +static uint32_t nvme_get_feature_hmb(NvmeCtrl *n, NvmeCmd *cmd)
>> +{
>> +uint32_t result = n->hmb_flag.flag;
>> +uint64_t prp1 = le64_to_cpu(cmd->prp1);
>> +uint64_t prp2 = le64_to_cpu(cmd->prp2);
>> +NvmeHmbAttr attr = {0, };
>> +
>> +attr.hsize = cpu_to_le32(n->hmb_attr.hsize);
>> +attr.hmdlal = cpu_to_le32(n->hmb_attr.hmdlal);
>> +attr.hmdlau = cpu_to_le32(n->hmb_attr.hmdlau);
>> +attr.hmdlec = cpu_to_le32(n->hmb_attr.hmdlec);
>> +
>> +nvme_dma_read_prp(n, (uint8_t *), sizeof(attr), prp1, prp2);
>
> This read can fail if prp1/prp2 are invalid.  How should the error be
> reported?

either or both prp1 and prp2 failed, there's no error handling in this code.
agreed it needs to be implemented if this code will be sent as a patch.

>
>> +
>> +return result;
>> +}
>> +
>>  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest 
>> *req)
>>  {
>>  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>> @@ -617,6 +634,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
>> *cmd, NvmeRequest *req)
>>  case NVME_NUMBER_OF_QUEUES:
>>  result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 
>> 16));
>>  break;
>> +case NVME_HOST_MEMORY_BUFFER:
>> +result = nvme_get_feature_hmb(n, cmd);
>> +break;
>>  default:
>>  return NVME_INVALID_FIELD | NVME_DNR;
>>  }
>> @@ -625,6 +645,16 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd 
>> *cmd, NvmeRequest *req)
>>  return NVME_SUCCESS;
>>  }
>>
>> +static void nvme_set_feature_hmb(NvmeCtrl *n, NvmeCmd *cmd)
>> +{
>> +n->hmb_flag.flag = le32_to_cpu(cmd->cdw11);
>> +
>> +n->hmb_attr.hsize = le32_to_cpu(cmd->cdw12);
>> +n->hmb_attr.hmdlal = le32_to_cpu(cmd->cdw13);
>> +n->hmb_attr.hmdlau = le32_to_cpu(cmd->cdw14);
>> +n->hmb_attr.hmdlec = le32_to_cpu(cmd->cdw15);
>> +}
>> +
>>  static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest 
>> *req)
>>  {
>>  uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>> @@ -638,6 +668,9 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd 
>> *cmd, NvmeRequest *req)
>>  req->cqe.result =
>>  cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
>>  break;
>> +case NVME_HOST_MEMORY_BUFFER:
>> +nvme_set_feature_hmb(n, cmd);
>> +break;
>>  default:
>>  return NVME_INVALID_FIELD | NVME_DNR;
>>  }
>> @@ -985,6 +1018,8 @@ static int nvme_init(PCIDevice *pci_dev)
>>  id->oacs = cpu_to_le16(0);
>>  id->frmw = 7 << 1;
>>  id->lpa = 1 << 0;
>> +id->hmpre = 0x2000;
>
> Missing cpu_to_le32()?

agreed hmpre should be set with le32 instead of raw value.

>
>> +id->hmmin = 0x0;
>>  id->sqes = (0x6 << 4) | 0x6;
>>  id->cqes = (0x4 << 4) | 0x4;
>>  id->nn = cpu_to_le32(n->num_namespaces);
>> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
>> index 6aab338..fab748b 100644
>> --- a/hw/block/nvme.h
>> +++ b/hw/block/nvme.h
>> @@ -552,7 +552,10 @@ typedef struct NvmeIdCtrl {
>>  uint8_t lpa;
>>  uint8_t elpe;
>>  uint8_t npss;
>> -uint8_t rsvd511[248];
>> +uint8_t rsvd271[8];
>> +uint32_thmpre;
>> +uint32_thmmin;
>> +uint8_t rsvd511[232];
>>  

[PATCH] nvme: Print 'cqid' for nvme_del_cq

2020-03-24 Thread Minwoo Im
The given argument for this trace should be cqid, not sqid.

Signed-off-by: Minwoo Im 
---
 hw/block/trace-events | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/block/trace-events b/hw/block/trace-events
index f78939fa9da1..bf6d11b58b85 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -37,7 +37,7 @@ nvme_rw(const char *verb, uint32_t blk_count, uint64_t 
byte_count, uint64_t lba)
 nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, 
uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", 
cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
 nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, 
uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", 
cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
 nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
-nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
+nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16""
 nvme_identify_ctrl(void) "identify controller"
 nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
 nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
-- 
2.17.1




Re: [PATCH 04/16] hw/block/nvme: remove redundant has_sg member

2020-07-29 Thread Minwoo Im
> -Original Message-
> From: Qemu-devel  On
> Behalf Of Klaus Jensen
> Sent: Thursday, July 30, 2020 3:29 AM
> To: Minwoo Im 
> Cc: Kevin Wolf ; qemu-block@nongnu.org; Klaus Jensen
> ; qemu-de...@nongnu.org; Max Reitz ;
> Keith Busch 
> Subject: Re: [PATCH 04/16] hw/block/nvme: remove redundant has_sg member
> 
> On Jul 30 00:29, Minwoo Im wrote:
> > Klaus,
> >
> 
> Hi Minwoo,
> 
> Thanks for the reviews and welcome to the party! :)
> 
> > On 20-07-20 13:37:36, Klaus Jensen wrote:
> > > From: Klaus Jensen 
> > >
> > > Remove the has_sg member from NvmeRequest since it's redundant.
> > >
> > > Also, make sure the request iov is destroyed at completion time.
> > >
> > > Signed-off-by: Klaus Jensen 
> > > Reviewed-by: Maxim Levitsky 
> > > ---
> > >  hw/block/nvme.c | 11 ++-
> > >  hw/block/nvme.h |  1 -
> > >  2 files changed, 6 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index cb236d1c8c46..6a1a1626b87b 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -548,16 +548,20 @@ static void nvme_rw_cb(void *opaque, int ret)
> > >  block_acct_failed(blk_get_stats(n->conf.blk), >acct);
> > >  req->status = NVME_INTERNAL_DEV_ERROR;
> > >  }
> > > -if (req->has_sg) {
> > > +
> > > +if (req->qsg.nalloc) {
> >
> > Personally, I prefer has_xxx or is_xxx to check whether the request is
> > based on sg or iov as an inline function, but 'nalloc' is also fine to
> > figure out the meaning of purpose here.
> >
> 
> What I really want to do is get rid of this duality with qsg and iovs at
> some point. I kinda wanna get rid of the dma helpers and the qsg
> entirely and do the DMA handling directly.
> 
> Maybe an `int flags` member in NvmeRequest would be better for this,
> such as NVME_REQ_DMA etc.

That looks better, but it looks like this is out of scope of this series.
Anyway, Please take my tag for this patch.

Reviewed-by: Minwoo Im 

> 
> > >  qemu_sglist_destroy(>qsg);
> > >  }
> > > +if (req->iov.nalloc) {
> > > +qemu_iovec_destroy(>iov);
> > > +}
> > > +
> >
> > Maybe this can be in a separated commit?
> >
> 
> Yeah. I guess whenever a commit message includes "Also, ..." you really
> should factor the change out ;)
> 
> I'll split it.
> 
> > Otherwise, It looks good to me.
> >
> > Thanks,
> >
> 
> Does that mean I can add your R-b? :)




Re: [PATCH v2 2/4] hw/block/nvme: support multiple namespaces

2020-08-08 Thread Minwoo Im
On 06/29 23:40, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> This adds support for multiple namespaces by introducing a new 'nvme-ns'
> device model. The nvme device creates a bus named from the device name
> ('id'). The nvme-ns devices then connect to this and registers
> themselves with the nvme device.
> 
> This changes how an nvme device is created. Example with two namespaces:
> 
>   -drive file=nvme0n1.img,if=none,id=disk1
>   -drive file=nvme0n2.img,if=none,id=disk2
>   -device nvme,serial=deadbeef,id=nvme0
>   -device nvme-ns,drive=disk1,bus=nvme0,nsid=1
>   -device nvme-ns,drive=disk2,bus=nvme0,nsid=2
> 
> The drive property is kept on the nvme device to keep the change
> backward compatible, but the property is now optional. Specifying a
> drive for the nvme device will always create the namespace with nsid 1.
> 
> Signed-off-by: Klaus Jensen 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Keith Busch 

Reviewed-by: Minwoo Im 



Re: [PATCH v2 15/16] hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp

2020-07-30 Thread Minwoo Im
On 20-07-30 00:06:37, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Since clean up of the request qsg/iov is now always done post-use, there
> is no need to use a stack-allocated qsg/iov in nvme_dma_prp.
> 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> Reviewed-by: Maxim Levitsky 

Reviewed-by: Minwoo Im 



Re: [PATCH v2 14/16] hw/block/nvme: consolidate qsg/iov clearing

2020-07-30 Thread Minwoo Im
> Hi Minwoo,
>
> The is that on 'exit' we release request resources (like the qsg and
> iov). On 'clear' we initialize the request by clearing the struct. I
> guess I could call it nvme_req_init instead maybe, but really - it is
> clearing it.

Yeah, I just wanted to make it clear to understand myself here.

Reviewed-by: Minwoo Im 



Re: [PATCH v2 07/16] hw/block/nvme: add tracing to nvme_map_prp

2020-07-30 Thread Minwoo Im
On Thu, Jul 30, 2020 at 7:06 AM Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> Add tracing to nvme_map_prp.
>
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 2 ++
>  hw/block/trace-events | 1 +
>  2 files changed, 3 insertions(+)
>
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 571635ebe9f9..952afbb05175 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -274,6 +274,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
> QEMUIOVector *iov, uint64_t prp1,
>  int num_prps = (len >> n->page_bits) + 1;
>  uint16_t status;
>
> +trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);

Hmm.. Okay with this.  But once QEMUSGList and QEMUIOVector instances are coming
into the NvmeRequest, we just can pass the NvmeRequest instance here
and print the cid as well
later :)

Reviewed-by: Minwoo Im 

Thanks!

> +
>  if (unlikely(!prp1)) {
>  trace_pci_nvme_err_invalid_prp();
>  return NVME_INVALID_FIELD | NVME_DNR;
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index f3b2d004e078..f20c59a4b542 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -35,6 +35,7 @@ pci_nvme_irq_masked(void) "IRQ is masked"
>  pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" 
> prp2=0x%"PRIx64""
>  pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len 
> %"PRIu64""
>  pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len 
> %"PRIu64""
> +pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t 
> prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 
> 0x%"PRIx64" num_prps %d"
>  pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) 
> "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8""
>  pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode) "cid 
> %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8""
>  pci_nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, 
> uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
> --
> 2.27.0
>



Re: [PATCH v2 08/16] hw/block/nvme: add request mapping helper

2020-07-30 Thread Minwoo Im
On Thu, Jul 30, 2020 at 7:06 AM Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> Introduce the nvme_map helper to remove some noise in the main nvme_rw
> function.
>
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c | 13 ++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 952afbb05175..198a26890e0c 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -415,6 +415,15 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, 
> uint32_t len,
>  return status;
>  }
>
> +static uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeCmd *cmd, size_t len,
> + NvmeRequest *req)
> +{
> +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +
> +return nvme_map_prp(>qsg, >iov, prp1, prp2, len, n);
> +}

Let's do something for MPTR laster also when we are right in front of that.

Looks good to me.

Reviewed-by: Minwoo Im 

> +
>  static void nvme_post_cqes(void *opaque)
>  {
>  NvmeCQueue *cq = opaque;
> @@ -602,8 +611,6 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
> NvmeCmd *cmd,
>  NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
>  uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
>  uint64_t slba = le64_to_cpu(rw->slba);
> -uint64_t prp1 = le64_to_cpu(rw->dptr.prp1);
> -uint64_t prp2 = le64_to_cpu(rw->dptr.prp2);
>
>  uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
>  uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
> @@ -620,7 +627,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
> NvmeCmd *cmd,
>  return NVME_LBA_RANGE | NVME_DNR;
>  }
>
> -if (nvme_map_prp(>qsg, >iov, prp1, prp2, data_size, n)) {
> +if (nvme_map_dptr(n, cmd, data_size, req)) {
>  block_acct_invalid(blk_get_stats(n->conf.blk), acct);
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
> --
> 2.27.0
>



Re: [PATCH v2 16/16] hw/block/nvme: remove explicit qsg/iov parameters

2020-07-30 Thread Minwoo Im
On 20-07-30 00:06:38, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Since nvme_map_prp always operate on the request-scoped qsg/iovs, just
> pass a single pointer to the NvmeRequest instead of two for each of the
> qsg and iov.
> 
> Suggested-by: Minwoo Im 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 55b1a68ced8c..aea8a8b6946c 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -284,8 +284,8 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList 
> *qsg, QEMUIOVector *iov,
>  return NVME_SUCCESS;
>  }
>  
> -static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t 
> prp1,
> - uint64_t prp2, uint32_t len, NvmeCtrl *n)
> +static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
> + uint32_t len, NvmeRequest *req)
>  {
>  hwaddr trans_len = n->page_size - (prp1 % n->page_size);
>  trans_len = MIN(len, trans_len);
> @@ -293,6 +293,9 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
> QEMUIOVector *iov, uint64_t prp1,
>  uint16_t status;
>  bool prp_list_in_cmb = false;
>  
> +QEMUSGList *qsg = >qsg;
> +QEMUIOVector *iov = >iov;
> +
>  trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
>  
>  if (unlikely(!prp1)) {
> @@ -386,7 +389,7 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, 
> uint32_t len,
>  {
>  uint16_t status = NVME_SUCCESS;
>  
> -status = nvme_map_prp(>qsg, >iov, prp1, prp2, len, n);
> +status = nvme_map_prp(n, prp1, prp2, len, req);
>  if (status) {
>  return status;
>  }
> @@ -431,7 +434,7 @@ static uint16_t nvme_map_dptr(NvmeCtrl *n, size_t len, 
> NvmeRequest *req)
>  uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
>  uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
>  
> -return nvme_map_prp(>qsg, >iov, prp1, prp2, len, n);
> +return nvme_map_prp(n, prp1, prp2, len, req);
>  }
>  
>  static void nvme_post_cqes(void *opaque)


This looks better to have qsg and iov in the NvmeRequest.

Reviewed-by: Minwoo Im 



Re: [PATCH v2 14/16] hw/block/nvme: consolidate qsg/iov clearing

2020-07-30 Thread Minwoo Im
On 20-07-30 00:06:36, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Always destroy the request qsg/iov at the end of request use.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 52 -
>  1 file changed, 21 insertions(+), 31 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 3d7275eae369..045dd55376a5 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -217,6 +217,17 @@ static void nvme_req_clear(NvmeRequest *req)
>  memset(>cqe, 0x0, sizeof(req->cqe));
>  }
>  
> +static void nvme_req_exit(NvmeRequest *req)
> +{
> +if (req->qsg.sg) {
> +qemu_sglist_destroy(>qsg);
> +}
> +
> +if (req->iov.iov) {
> +qemu_iovec_destroy(>iov);
> +}
> +}
> +

Klaus,

What is differences between 'clear' and 'exit' from the request
perspective?

Thanks,



Re: [PATCH v2 04/16] hw/block/nvme: remove redundant has_sg member

2020-07-30 Thread Minwoo Im
On Thu, Jul 30, 2020 at 7:06 AM Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> Remove the has_sg member from NvmeRequest since it's redundant.
>
> Signed-off-by: Klaus Jensen 

Looks better than the previous one to me.

Reviewed-by: Minwoo Im 



Re: [PATCH v2 05/16] hw/block/nvme: destroy request iov before reuse

2020-07-30 Thread Minwoo Im
On Thu, Jul 30, 2020 at 7:06 AM Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> Make sure the request iov is destroyed before reuse; fixing a memory
> leak.
>
> Signed-off-by: Klaus Jensen 

Looks good to me and Thanks for splitting this up.

Reviewed-by: Minwoo Im 



Re: [PATCH v2 01/16] hw/block/nvme: memset preallocated requests structures

2020-07-30 Thread Minwoo Im
On Thu, Jul 30, 2020 at 7:06 AM Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> This is preparatory to subsequent patches that change how QSGs/IOVs are
> handled. It is important that the qsg and iov members of the NvmeRequest
> are initially zeroed.
>
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 

Reviewed-by: Minwoo Im 



Re: [PATCH 11/16] hw/block/nvme: be consistent about zeros vs zeroes

2020-07-29 Thread Minwoo Im
Reviewed-by: Minwoo Im 

Thanks,



Re: [PATCH 10/16] hw/block/nvme: add check for mdts

2020-07-29 Thread Minwoo Im
On 20-07-20 13:37:42, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add 'mdts' device parameter to control the Maximum Data Transfer Size of
> the controller and check that it is respected.
> 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c   | 32 ++--
>  hw/block/nvme.h   |  1 +
>  hw/block/trace-events |  1 +
>  3 files changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 35bc1a7b7e21..10fe53873ae9 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -18,9 +18,10 @@
>   * Usage: add options:
>   *  -drive file=,if=none,id=
>   *  -device nvme,drive=,serial=,id=, \
> - *  cmb_size_mb=, \
> + *  [cmb_size_mb=,] \
>   *  [pmrdev=,] \
> - *  max_ioqpairs=
> + *  [max_ioqpairs=,] \
> + *  [mdts=]

Nitpick:
  cmb and ioqpairs-things could be in another thread. :)

>   *
>   * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
>   * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
> @@ -553,6 +554,17 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t 
> event_type)
>  }
>  }
>  
> +static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len)
> +{
> +uint8_t mdts = n->params.mdts;
> +
> +if (mdts && len > n->page_size << mdts) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +return NVME_SUCCESS;
> +}
> +
>  static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
>   uint64_t slba, uint32_t nlb)
>  {
> @@ -646,6 +658,13 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
> NvmeCmd *cmd,
>  
>  trace_pci_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
>  
> +status = nvme_check_mdts(n, data_size);
> +if (status) {
> +trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
> +block_acct_invalid(blk_get_stats(n->conf.blk), acct);
> +return status;
> +}
> +
>  status = nvme_check_bounds(n, ns, slba, nlb);
>  if (status) {
>  trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
> @@ -938,6 +957,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, 
> NvmeRequest *req)
>  uint32_t numdl, numdu;
>  uint64_t off, lpol, lpou;
>  size_t   len;
> +uint16_t status;
>  
>  numdl = (dw10 >> 16);
>  numdu = (dw11 & 0x);
> @@ -953,6 +973,12 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, 
> NvmeRequest *req)
>  
>  trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
>  
> +status = nvme_check_mdts(n, len);
> +if (status) {
> +trace_pci_nvme_err_mdts(nvme_cid(req), len);
> +return status;
> +}
> +
>  switch (lid) {
>  case NVME_LOG_ERROR_INFO:
>  return nvme_error_info(n, cmd, rae, len, off, req);
> @@ -2275,6 +2301,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> *pci_dev)
>  id->ieee[0] = 0x00;
>  id->ieee[1] = 0x02;
>  id->ieee[2] = 0xb3;
> +id->mdts = n->params.mdts;
>  id->ver = cpu_to_le32(NVME_SPEC_VER);
>  id->oacs = cpu_to_le16(0);
>  
> @@ -2394,6 +2421,7 @@ static Property nvme_props[] = {
>  DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
>  DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
>  DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 
> 64),
> +DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
>  DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 5519b5cc7686..137cd8c2bf20 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -11,6 +11,7 @@ typedef struct NvmeParams {
>  uint32_t cmb_size_mb;
>  uint8_t  aerl;
>  uint32_t aer_max_queued;
> +uint8_t  mdts;
>  } NvmeParams;
>  
>  typedef struct NvmeAsyncEvent {
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 6d0cd588c786..5d7d4679650b 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -85,6 +85,7 @@ pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
>  pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
>  
>  # nvme traces for error conditions
> +pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %"PRIu64""
>  pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
>  pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null 
> or not page aligned: 0x%"PRIx64""
>  pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 
> 0x%"PRIx64""
> -- 
> 2.27.0
> 
> 

Reviewed-by: Minwoo Im 

Thanks,



Re: [PATCH 13/16] hw/block/nvme: add a namespace reference in NvmeRequest

2020-07-29 Thread Minwoo Im
On 20-07-20 13:37:45, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Instead of passing around the NvmeNamespace, add it as a member in the
> NvmeRequest structure.
> 
> Signed-off-by: Klaus Jensen 

Reviewed-by: Minwoo Im 



Re: [PATCH 14/16] hw/block/nvme: consolidate qsg/iov clearing

2020-07-29 Thread Minwoo Im
On 20-07-20 13:37:46, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Always destroy the request qsg/iov at the end of request use.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 48 +---
>  1 file changed, 17 insertions(+), 31 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 54cd20f1ce22..b53afdeb3fb6 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -213,6 +213,14 @@ static void nvme_req_clear(NvmeRequest *req)
>  {
>  req->ns = NULL;
>  memset(>cqe, 0x0, sizeof(req->cqe));
> +
> +if (req->qsg.sg) {
> +qemu_sglist_destroy(>qsg);
> +}
> +
> +if (req->iov.iov) {
> +qemu_iovec_destroy(>iov);
> +}

Oh okay.  This looks like update for the previous patch in this series.
And I also agree on starting to make focus on nvme_req_clear() for
wrap-up.

Looks good to me.

Reviewed-by: Minwoo Im 



Re: [PATCH 12/16] hw/block/nvme: refactor NvmeRequest clearing

2020-07-29 Thread Minwoo Im
On 20-07-20 13:37:44, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Move clearing of the structure from "clear before use" to "clear
> after use".

Yah, agree on this.

Reviewed-by: Minwoo Im 



Re: [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter

2020-07-29 Thread Minwoo Im
On 20-07-20 13:37:47, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Keep a copy of the raw nvme command in the NvmeRequest and remove the
> now redundant NvmeCmd parameter.
> 
> Signed-off-by: Klaus Jensen 

I would really have suggested this change from 13th patch!

Reviewed-by: Minwoo Im 

Thanks,



Re: [PATCH 16/16] hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp

2020-07-29 Thread Minwoo Im
On 20-07-20 13:37:48, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Since clean up of the request qsg/iov is now always done post-use, there
> is no need to use a stack-allocated qsg/iov in nvme_dma_prp.
> 
> Signed-off-by: Klaus Jensen 
> Acked-by: Keith Busch 
> Reviewed-by: Maxim Levitsky 

> ---
>  hw/block/nvme.c | 18 ++
>  1 file changed, 6 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 0b3dceccc89b..b6da5a9f3fc6 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -381,45 +381,39 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, 
> uint32_t len,
>   uint64_t prp1, uint64_t prp2, DMADirection dir,
>   NvmeRequest *req)
>  {
> -QEMUSGList qsg;
> -QEMUIOVector iov;
>  uint16_t status = NVME_SUCCESS;
>  
> -status = nvme_map_prp(n, , , prp1, prp2, len, req);
> +status = nvme_map_prp(n, >qsg, >iov, prp1, prp2, len, req);

After this change, can we make nvme_map_prp() just receive
NvmeRequest *req without >qsg, >iov by retrieve them from
inside of the nvme_map_prp()?



Re: [PATCH 03/16] hw/block/nvme: replace dma_acct with blk_acct equivalent

2020-07-29 Thread Minwoo Im
Klaus,

On 20-07-20 13:37:35, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> The QSG isn't always initialized, so accounting could be wrong. Issue a
> call to blk_acct_start instead with the size taken from the QSG or IOV
> depending on the kind of I/O.
> 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 9b1a080cdc70..cb236d1c8c46 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -620,9 +620,10 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
> NvmeCmd *cmd,
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
>  
> -dma_acct_start(n->conf.blk, >acct, >qsg, acct);
>  if (req->qsg.nsg > 0) {
>  req->has_sg = true;
> +block_acct_start(blk_get_stats(n->conf.blk), >acct, 
> req->qsg.size,
> + acct);
>  req->aiocb = is_write ?
>  dma_blk_write(n->conf.blk, >qsg, data_offset, 
> BDRV_SECTOR_SIZE,
>nvme_rw_cb, req) :
> @@ -630,6 +631,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, 
> NvmeCmd *cmd,
>   nvme_rw_cb, req);
>  } else {
>  req->has_sg = false;
> +block_acct_start(blk_get_stats(n->conf.blk), >acct, 
> req->iov.size,
> + acct);
>      req->aiocb = is_write ?
>  blk_aio_pwritev(n->conf.blk, data_offset, >iov, 0, 
> nvme_rw_cb,
>  req) :

Reviewed-by: Minwoo Im 

Thanks,



Re: [PATCH 02/16] hw/block/nvme: add mapping helpers

2020-07-29 Thread Minwoo Im
Hi Klaus,

On 20-07-20 13:37:34, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and
> use them in nvme_map_prp.
> 
> This fixes a bug where in the case of a CMB transfer, the device would
> map to the buffer with a wrong length.
> 
> Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in 
> CMBs.")
> Signed-off-by: Klaus Jensen 

>  static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t 
> prp1,
>   uint64_t prp2, uint32_t len, NvmeCtrl *n)
>  {
>  hwaddr trans_len = n->page_size - (prp1 % n->page_size);
>  trans_len = MIN(len, trans_len);
>  int num_prps = (len >> n->page_bits) + 1;
> +uint16_t status;
>  
>  if (unlikely(!prp1)) {
>  trace_pci_nvme_err_invalid_prp();
>  return NVME_INVALID_FIELD | NVME_DNR;
> -} else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr &&
> -   prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
> -qsg->nsg = 0;
> +}
> +
> +if (nvme_addr_is_cmb(n, prp1)) {
>  qemu_iovec_init(iov, num_prps);
> -qemu_iovec_add(iov, (void *)>cmbuf[prp1 - n->ctrl_mem.addr], 
> trans_len);
>  } else {
>  pci_dma_sglist_init(qsg, >parent_obj, num_prps);
> -qemu_sglist_add(qsg, prp1, trans_len);
>  }
> +
> +status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
> +if (status) {
> +goto unmap;
> +}
> +
>  len -= trans_len;
>  if (len) {
>  if (unlikely(!prp2)) {
>  trace_pci_nvme_err_invalid_prp2_missing();
> +status = NVME_INVALID_FIELD | NVME_DNR;
>  goto unmap;
>  }
>  if (len > n->page_size) {
> @@ -242,6 +309,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
> QEMUIOVector *iov, uint64_t prp1,
>  if (i == n->max_prp_ents - 1 && len > n->page_size) {
>  if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>  trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> +status = NVME_INVALID_FIELD | NVME_DNR;
>  goto unmap;
>  }
>  
> @@ -255,14 +323,14 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
> QEMUIOVector *iov, uint64_t prp1,
>  
>  if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>  trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> +status = NVME_INVALID_FIELD | NVME_DNR;
>  goto unmap;
>  }
>  
>  trans_len = MIN(len, n->page_size);
> -if (qsg->nsg){
> -qemu_sglist_add(qsg, prp_ent, trans_len);
> -} else {
> -qemu_iovec_add(iov, (void *)>cmbuf[prp_ent - 
> n->ctrl_mem.addr], trans_len);
> +status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
> +if (status) {
> +goto unmap;
>  }
>  len -= trans_len;
>  i++;
> @@ -270,20 +338,27 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, 
> QEMUIOVector *iov, uint64_t prp1,
>  } else {
>  if (unlikely(prp2 & (n->page_size - 1))) {
>  trace_pci_nvme_err_invalid_prp2_align(prp2);
> +status = NVME_INVALID_FIELD | NVME_DNR;
>  goto unmap;
>  }
> -        if (qsg->nsg) {
> -qemu_sglist_add(qsg, prp2, len);
> -} else {
> -qemu_iovec_add(iov, (void *)>cmbuf[prp2 - 
> n->ctrl_mem.addr], trans_len);
> +status = nvme_map_addr(n, qsg, iov, prp2, len);
> +if (status) {
> +goto unmap;

I like these parts which is much better to read the codes.

Reviewed-by: Minwoo Im 

Thanks,



Re: [PATCH 04/16] hw/block/nvme: remove redundant has_sg member

2020-07-29 Thread Minwoo Im
Klaus,

On 20-07-20 13:37:36, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Remove the has_sg member from NvmeRequest since it's redundant.
> 
> Also, make sure the request iov is destroyed at completion time.
> 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c | 11 ++-
>  hw/block/nvme.h |  1 -
>  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index cb236d1c8c46..6a1a1626b87b 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -548,16 +548,20 @@ static void nvme_rw_cb(void *opaque, int ret)
>  block_acct_failed(blk_get_stats(n->conf.blk), >acct);
>  req->status = NVME_INTERNAL_DEV_ERROR;
>  }
> -if (req->has_sg) {
> +
> +if (req->qsg.nalloc) {

Personally, I prefer has_xxx or is_xxx to check whether the request is
based on sg or iov as an inline function, but 'nalloc' is also fine to
figure out the meaning of purpose here.

>  qemu_sglist_destroy(>qsg);
>  }
> +if (req->iov.nalloc) {
> +qemu_iovec_destroy(>iov);
> +}
> +

Maybe this can be in a separated commit?

Otherwise, It looks good to me.

Thanks,



Re: [PATCH 05/16] hw/block/nvme: refactor dma read/write

2020-07-29 Thread Minwoo Im
Klaus,

On 20-07-20 13:37:37, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Refactor the nvme_dma_{read,write}_prp functions into a common function
> taking a DMADirection parameter.
> 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 

Reviewed-by: Minwoo Im 

Thanks,



Re: [PATCH 06/16] hw/block/nvme: pass request along for tracing

2020-07-29 Thread Minwoo Im
Klaus,

On 20-07-20 13:37:38, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Pass along the NvmeRequest in various functions since it is very useful
> for tracing.

One doubt here.
  This patch has put NvmeRequest argument to the nvme_map_prp() to trace
  the request's command id.  But can we just trace the cid before this
  kind of prp mapping, somewhere like nvme_process_sq() level.  Then we
  can figure out the tracing for the prp mapping is from which request.

Tracing for cid is definitely great, but feels like too much cost to
pass argument to trace 'cid' in the middle of the dma mapping stage.

Thanks,



Re: [PATCH 08/16] hw/block/nvme: verify validity of prp lists in the cmb

2020-07-29 Thread Minwoo Im
On 20-07-20 13:37:40, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Before this patch the device already supported PRP lists in the CMB, but
> it did not check for the validity of it nor announced the support in the
> Identify Controller data structure LISTS field.
> 
> If some of the PRPs in a PRP list are in the CMB, then ALL entries must
> be there. This patch makes sure that requirement is verified as well as
> properly announcing support for PRP lists in the CMB.
> 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 

Reviewed-by: Minwoo Im 



Re: [PATCH 09/16] hw/block/nvme: refactor request bounds checking

2020-07-29 Thread Minwoo Im
On 20-07-20 13:37:41, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Hoist bounds checking into its own function and check for wrap-around.
> 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 

Reviewed-by: Minwoo Im 



Re: [PATCH 07/16] hw/block/nvme: add request mapping helper

2020-07-29 Thread Minwoo Im
Klaus,

On 20-07-20 13:37:39, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Introduce the nvme_map helper to remove some noise in the main nvme_rw
> function.
> 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Maxim Levitsky 
> ---
>  hw/block/nvme.c | 13 ++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f1e04608804b..68c33a11c144 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -413,6 +413,15 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, 
> uint32_t len,
>  return status;
>  }
>  
> +static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, size_t len,
> + NvmeRequest *req)

Can we specify what is going to be mapped in this function? like
nvme_map_dptr?

Thanks,



Re: [PATCH 1/2] nvme: updated shared header for copy command

2020-11-25 Thread Minwoo Im
Hello,

On 20-11-24 08:14:17, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add new data structures and types for the Simple Copy command.
> 
> Signed-off-by: Klaus Jensen 
> Cc: Stefan Hajnoczi 
> Cc: Fam Zheng 
> ---
>  include/block/nvme.h | 45 ++--
>  1 file changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index e95ff6ca9b37..b56efd6a89af 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -472,6 +472,7 @@ enum NvmeIoCommands {
>  NVME_CMD_COMPARE= 0x05,
>  NVME_CMD_WRITE_ZEROES   = 0x08,
>  NVME_CMD_DSM= 0x09,
> +NVME_CMD_COPY   = 0x19,
>  };
>  
>  typedef struct QEMU_PACKED NvmeDeleteQ {
> @@ -603,6 +604,35 @@ typedef struct QEMU_PACKED NvmeDsmRange {
>  uint64_tslba;
>  } NvmeDsmRange;
>  
> +enum {
> +NVME_COPY_FORMAT_0 = 0x0,
> +};
> +
> +typedef struct NvmeCopyCmd {
> +uint8_t opcode;
> +uint8_t flags;
> +uint16_tcid;
> +uint32_tnsid;
> +uint32_trsvd2[4];
> +NvmeCmdDptr dptr;
> +uint64_tsdlba;
> +uint32_tcdw12;
> +uint32_tcdw13;
> +uint32_tilbrt;
> +uint16_tlbat;
> +uint16_tlbatm;
> +} NvmeCopyCmd;
> +
> +typedef struct NvmeCopySourceRange {
> +uint8_t  rsvd0[8];
> +uint64_t slba;
> +uint16_t nlb;
> +uint8_t  rsvd18[6];
> +uint32_t eilbrt;
> +uint16_t elbatm;
> +uint16_t elbat;
> +} NvmeCopySourceRange;
> +
>  enum NvmeAsyncEventRequest {
>  NVME_AER_TYPE_ERROR = 0,
>  NVME_AER_TYPE_SMART = 1,
> @@ -680,6 +710,7 @@ enum NvmeStatusCodes {
>  NVME_CONFLICTING_ATTRS  = 0x0180,
>  NVME_INVALID_PROT_INFO  = 0x0181,
>  NVME_WRITE_TO_RO= 0x0182,
> +NVME_CMD_SIZE_LIMIT = 0x0183,
>  NVME_WRITE_FAULT= 0x0280,
>  NVME_UNRECOVERED_READ   = 0x0281,
>  NVME_E2E_GUARD_ERROR= 0x0282,
> @@ -831,7 +862,7 @@ typedef struct QEMU_PACKED NvmeIdCtrl {
>  uint8_t nvscc;
>  uint8_t rsvd531;
>  uint16_tacwu;
> -uint8_t rsvd534[2];
> +uint16_tocfs;
>  uint32_tsgls;
>  uint8_t rsvd540[228];
>  uint8_t subnqn[256];
> @@ -854,6 +885,11 @@ enum NvmeIdCtrlOncs {
>  NVME_ONCS_FEATURES  = 1 << 4,
>  NVME_ONCS_RESRVATIONS   = 1 << 5,
>  NVME_ONCS_TIMESTAMP = 1 << 6,
> +NVME_ONCS_COPY  = 1 << 8,
> +};
> +
> +enum NvmeIdCtrlOcfs {
> +NVME_OCFS_COPY_FORMAT_0 = 1 << NVME_COPY_FORMAT_0,

I'd prefer (1 << 0) to (1 << enum) which is more obvious and same style
with enum NvmeIdCtrlOncs.

But I'm fine with both cases.

Please add:

Reviewed-by: Minwoo Im 



Re: [PATCH] hw/block/nvme: add compare command

2020-11-25 Thread Minwoo Im
Hello,

On 20-11-24 08:37:14, Klaus Jensen wrote:
> From: Gollu Appalanaidu 
> 
> Add the Compare command.
> 
> This implementation uses a bounce buffer to read in the data from
> storage and then compare with the host supplied buffer.
> 
> Signed-off-by: Gollu Appalanaidu 
> [k.jensen: rebased]
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c   | 100 +-
>  hw/block/trace-events |   2 +
>  2 files changed, 101 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f7f888402b06..f88710ca3948 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -999,6 +999,50 @@ static void nvme_aio_discard_cb(void *opaque, int ret)
>  nvme_enqueue_req_completion(nvme_cq(req), req);
>  }
>  
> +struct nvme_compare_ctx {
> +QEMUIOVector iov;
> +uint8_t *bounce;
> +size_t len;
> +};
> +
> +static void nvme_compare_cb(void *opaque, int ret)
> +{
> +NvmeRequest *req = opaque;
> +NvmeNamespace *ns = req->ns;
> +struct nvme_compare_ctx *ctx = req->opaque;
> +g_autofree uint8_t *buf = NULL;

nit-picking here: unnecessary initialization to NULL.

> +uint16_t status;
> +
> +trace_pci_nvme_compare_cb(nvme_cid(req));
> +
> +if (!ret) {
> +block_acct_done(blk_get_stats(ns->blkconf.blk), >acct);
> +} else {
> +block_acct_failed(blk_get_stats(ns->blkconf.blk), >acct);
> +nvme_aio_err(req, ret);
> +goto out;
> +}
> +
> +buf = g_malloc(ctx->len);
> +
> +status = nvme_dma(nvme_ctrl(req), buf, ctx->len, DMA_DIRECTION_TO_DEVICE,
> +  req);
> +if (status) {
> +goto out;
> +}

Don't we need to give status value to req->status in case of
(status != 0)?  If we don't give it to req->status, it will complete
with success status code even it fails during the nvme_dma().



Re: [PATCH] hw/block/nvme: fix bad clearing of CAP

2020-12-09 Thread Minwoo Im
Hello,

Reviewed-by: Minwoo Im 



Re: [PATCH v2] hw/block/nvme: add compare command

2020-11-26 Thread Minwoo Im
Hello,

On Fri, Nov 27, 2020 at 3:56 AM Klaus Jensen  wrote:
>
> From: Gollu Appalanaidu 
>
> Add the Compare command.
>
> This implementation uses a bounce buffer to read in the data from
> storage and then compare with the host supplied buffer.
>
> Signed-off-by: Gollu Appalanaidu 
> [k.jensen: rebased]
> Signed-off-by: Klaus Jensen 


Reviewed-by: Minwoo Im 



Re: [PATCH v8 1/5] hw/block/nvme: remove superfluous NvmeCtrl parameter

2020-11-16 Thread Minwoo Im
On 11/12 20:59, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> nvme_check_bounds has no use of the NvmeCtrl parameter; remove it.
> 
> Signed-off-by: Klaus Jensen 

Reviewed-by: Minwoo Im 



Re: [PATCH v8 2/5] hw/block/nvme: pull aio error handling

2020-11-16 Thread Minwoo Im
On 11/12 20:59, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add a new function, nvme_aio_err, to handle errors resulting from AIOs
> and use this from the callbacks.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 61 +
>  1 file changed, 36 insertions(+), 25 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 51f35e8cf18b..a96a996ddc0d 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -878,6 +878,41 @@ static inline uint16_t nvme_check_bounds(NvmeNamespace 
> *ns, uint64_t slba,
>  return NVME_SUCCESS;
>  }
>  
> +static void nvme_aio_err(NvmeRequest *req, int ret)
> +{
> +uint16_t status = NVME_SUCCESS;
> +Error *local_err = NULL;
> +
> +switch (req->cmd.opcode) {
> +case NVME_CMD_READ:
> +status = NVME_UNRECOVERED_READ;
> +break;
> +case NVME_CMD_FLUSH:
> +case NVME_CMD_WRITE:
> +case NVME_CMD_WRITE_ZEROES:
> +status = NVME_WRITE_FAULT;
> +break;
> +default:
> +status = NVME_INTERNAL_DEV_ERROR;
> +break;
> +}
> +
> +trace_pci_nvme_err_aio(nvme_cid(req), strerror(ret), status);
> +
> +error_setg_errno(_err, -ret, "aio failed");
> +error_report_err(local_err);
> +
> +/*
> + * Set the command status code to the first encountered error but allow a
> + * subsequent Internal Device Error to trump it.
> + */
> +if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
> +return;
> +}

Are we okay with the trace print up there in case that this if is taken?
I guess if this if is taken, the trace print may not that matter.



Re: [PATCH v8 3/5] hw/block/nvme: add dulbe support

2020-11-16 Thread Minwoo Im
On 11/12 20:59, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Add support for reporting the Deallocated or Unwritten Logical Block
> Error (DULBE).
> 
> Rely on the block status flags reported by the block layer and consider
> any block with the BDRV_BLOCK_ZERO flag to be deallocated.
> 
> Multiple factors affect when a Write Zeroes command result in
> deallocation of blocks.
> 
>   * the underlying file system block size
>   * the blockdev format
>   * the 'discard' and 'logical_block_size' parameters
> 
>  format | discard | wz (512B)  wz (4KiB)  wz (64KiB)
> -
>   qcow2ignore   n  n  y
>   qcow2unmapn  n  y
>   raw  ignore   n  y  y
>   raw  unmapn  y  y
> 
> So, this works best with an image in raw format and 4KiB LBAs, since
> holes can then be punched on a per-block basis (this assumes a file
> system with a 4kb block size, YMMV). A qcow2 image, uses a cluster size
> of 64KiB by default and blocks will only be marked deallocated if a full
> cluster is zeroed or discarded. However, this *is* consistent with the
> spec since Write Zeroes "should" deallocate the block if the Deallocate
> attribute is set and "may" deallocate if the Deallocate attribute is not
> set. Thus, we always try to deallocate (the BDRV_REQ_MAY_UNMAP flag is
> always set).
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme-ns.h|  4 ++
>  include/block/nvme.h  |  5 +++
>  hw/block/nvme-ns.c|  8 ++--
>  hw/block/nvme.c   | 91 ++-
>  hw/block/trace-events |  4 ++
>  5 files changed, 107 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
> index 83734f4606e1..44bf6271b744 100644
> --- a/hw/block/nvme-ns.h
> +++ b/hw/block/nvme-ns.h
> @@ -31,6 +31,10 @@ typedef struct NvmeNamespace {
>  NvmeIdNs id_ns;
>  
>  NvmeNamespaceParams params;
> +
> +struct {
> +uint32_t err_rec;
> +} features;
>  } NvmeNamespace;
>  
>  static inline uint32_t nvme_nsid(NvmeNamespace *ns)
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 8a46d9cf015f..966c3bb304bd 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -687,6 +687,7 @@ enum NvmeStatusCodes {
>  NVME_E2E_REF_ERROR  = 0x0284,
>  NVME_CMP_FAILURE= 0x0285,
>  NVME_ACCESS_DENIED  = 0x0286,
> +NVME_DULB   = 0x0287,
>  NVME_MORE   = 0x2000,
>  NVME_DNR= 0x4000,
>  NVME_NO_COMPLETE= 0x,
> @@ -903,6 +904,9 @@ enum NvmeIdCtrlLpa {
>  #define NVME_AEC_NS_ATTR(aec)   ((aec >> 8) & 0x1)
>  #define NVME_AEC_FW_ACTIVATION(aec) ((aec >> 9) & 0x1)
>  
> +#define NVME_ERR_REC_TLER(err_rec)  (err_rec & 0x)
> +#define NVME_ERR_REC_DULBE(err_rec) (err_rec & 0x1)
> +
>  enum NvmeFeatureIds {
>  NVME_ARBITRATION= 0x1,
>  NVME_POWER_MANAGEMENT   = 0x2,
> @@ -1023,6 +1027,7 @@ enum NvmeNsIdentifierType {
>  
>  
>  #define NVME_ID_NS_NSFEAT_THIN(nsfeat)  ((nsfeat & 0x1))
> +#define NVME_ID_NS_NSFEAT_DULBE(nsfeat) ((nsfeat >> 2) & 0x1)
>  #define NVME_ID_NS_FLBAS_EXTENDED(flbas)((flbas >> 4) & 0x1)
>  #define NVME_ID_NS_FLBAS_INDEX(flbas)   ((flbas & 0xf))
>  #define NVME_ID_NS_MC_SEPARATE(mc)  ((mc >> 1) & 0x1)
> diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
> index 31c80cdf5b5f..f1cc734c60f5 100644
> --- a/hw/block/nvme-ns.c
> +++ b/hw/block/nvme-ns.c
> @@ -33,9 +33,7 @@ static void nvme_ns_init(NvmeNamespace *ns)
>  NvmeIdNs *id_ns = >id_ns;
>  int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
>  
> -if (blk_get_flags(ns->blkconf.blk) & BDRV_O_UNMAP) {
> -ns->id_ns.dlfeat = 0x9;
> -}
> +ns->id_ns.dlfeat = 0x9;
>  
>  id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
>  
> @@ -44,6 +42,9 @@ static void nvme_ns_init(NvmeNamespace *ns)
>  /* no thin provisioning */
>  id_ns->ncap = id_ns->nsze;
>  id_ns->nuse = id_ns->ncap;
> +
> +/* support DULBE */
> +id_ns->nsfeat |= 0x4;
>  }
>  
>  static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> @@ -92,6 +93,7 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error 
> **errp)
>  }
>  
>  nvme_ns_init(ns);
> +
>  if (nvme_register_namespace(n, ns, errp)) {
>  return -1;
>  }
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index a96a996ddc0d..8d75a89fd872 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -105,6 +105,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
>  
>  static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
>  [NVME_TEMPERATURE_THRESHOLD]= NVME_FEAT_CAP_CHANGE,
> +[NVME_ERROR_RECOVERY]   = NVME_FEAT_CAP_CHANGE | 
> NVME_FEAT_CAP_NS,
>  [NVME_VOLATILE_WRITE_CACHE] = 

Re: [PATCH v8 4/5] nvme: add namespace I/O optimization fields to shared header

2020-11-16 Thread Minwoo Im
On 11/12 20:59, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> This adds the NPWG, NPWA, NPDG, NPDA and NOWS family of fields to the
> shared nvme.h header for use by later patches.
> 
> Signed-off-by: Klaus Jensen 
> Cc: Stefan Hajnoczi 
> Cc: Fam Zheng 
> Reviewed-by: Stefan Hajnoczi 

Reviewed-by: Minwoo Im 



Re: [PATCH v3 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-19 Thread Minwoo Im
On 21-01-19 11:15:02, Klaus Jensen wrote:
> From: Padmakar Kalghatgi 
> 
> Implement v1.4 logic for configuring the Controller Memory Buffer. This
> is not backward compatible with v1.3, so drivers that only support v1.3
> will not be able to use the CMB anymore.

Reviewed the legacy-cmb paramete, but we should update the commit
description up there as we can support v1.3 behavior also, or you can
update it when you are picking up :)



Re: [PATCH v3 07/12] hw/block/nvme: remove redundant zeroing of PMR registers

2021-01-20 Thread Minwoo Im
Reviewed-by: Minwoo Im 



Re: [RFC PATCH V3 8/8] hw/block/nvme: Add Identify Active Namespace ID List

2021-01-20 Thread Minwoo Im
> Hello Minwoo,
> 
> By introducing a detached parameter,
> you are also implicitly making the following
> NVMe commands no longer be spec compliant:
> 
> NVME_ID_CNS_NS, NVME_ID_CNS_CS_NS,
> NVME_ID_CNS_NS_ACTIVE_LIST, NVME_ID_CNS_CS_NS_ACTIVE_LIST
> 
> When these commands are called on a detached namespace,
> they should usually return a zero filled data struct.

Agreed.

> Dmitry and I had a patch on V8 on the ZNS series
> that tried to fix some the existing NVMe commands
> to be spec compliant, by handling detached namespaces
> properly. In the end, in order to make it easier to
> get the ZNS series accepted, we decided to drop the
> detached related stuff from the series.
> 
> Feel free to look at that patch for inspiration:
> https://github.com/dmitry-fomichev/qemu/commit/251c0ffee5149c739b1347811fa7e32a1c36bf7c

I've seen this patch and as Klaus said, only thing patches need go with
is to put some of nvme_ns_is_attached() helper among the Identify
handlers.

> I'm not sure if you want to modify all the functions that
> our patch modifies, but I think that you should at least
> modify the following nvme functions:
> 
> nvme_identify_ns()
> nvme_identify_ns_csi()
> nvme_identify_nslist()
> nvme_identify_nslist_csi()

Yes, pretty makes sense to update them.  But now it seems like
'attach/detach' scheme should have been separated out of this series
which just introduced the multi-path for controllers and namespace
sharing.  I will drop this 'detach' scheme out of this series and make
another series to support all of the Identify you mentioned above
cleanly.

> So they handle detached namespaces correctly for both:
> NVME_ID_CNS_NS, NVME_ID_CNS_CS_NS,
> NVME_ID_CNS_NS_ACTIVE_LIST, NVME_ID_CNS_CS_NS_ACTIVE_LIST
> as well as for:
> NVME_ID_CNS_NS_PRESENT, NVME_ID_CNS_CS_NS_PRESENT,
> NVME_ID_CNS_NS_PRESENT_LIST, NVME_ID_CNS_CS_NS_PRESENT_LIST
> 
> 
> Kind regards,
> Niklas



Re: [PATCH v3 12/12] hw/block/nvme: lift cmb restrictions

2021-01-20 Thread Minwoo Im
Nice for codes much more clean.

Reviewed-by: Minwoo Im 



Re: [PATCH v3 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-20 Thread Minwoo Im
Reviewed-by: Minwoo Im 



Re: [PATCH V5 4/6] hw/block/nvme: support for multi-controller in subsystem

2021-01-22 Thread Minwoo Im
On 21-01-22 10:42:36, Keith Busch wrote:
> On Fri, Jan 22, 2021 at 09:07:34PM +0900, Minwoo Im wrote:
> > index b525fca14103..3dedefb8ebba 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -4435,6 +4435,9 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> > *pci_dev)
> >  strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
> >  strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
> >  strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
> > +
> > +id->cntlid = n->cntlid;
> 
> cpu_to_le16()? It might be okay to not do that since the only
> requirement is that this is a unique value, but it would be confusing
> for decoding commands that have a controller id field.

Agreed.

Yes, cntlids are allocated in unique values so that functionality has no
problem here.  But, even if so, we should make it have proper value in
Identify data structure with the policy it has to avoid confusing.

Thanks Keith! will fix it :)



[PATCH] hw/block/nvme: fix wrong parameter name 'cross_read'

2021-01-25 Thread Minwoo Im
The actual parameter name is 'cross_read' rather than 'cross_zone_read'.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 21aec90637fa..bf9134f73d81 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -81,7 +81,7 @@
  * The default value means there is no limit to the number of
  * concurrently open zones.
  *
- * zoned.cross_zone_read=
+ * zoned.cross_read=
  * Setting this property to true enables Read Across Zone Boundaries.
  */
 
-- 
2.17.1




Re: [PATCH V6 4/6] hw/block/nvme: support for multi-controller in subsystem

2021-01-25 Thread Minwoo Im
On 21-01-25 10:11:43, Keith Busch wrote:
> On Mon, Jan 25, 2021 at 07:03:32PM +0100, Klaus Jensen wrote:
> > On Jan 24 11:54, Minwoo Im wrote:
> > > We have nvme-subsys and nvme devices mapped together.  To support
> > > multi-controller scheme to this setup, controller identifier(id) has to
> > > be managed.  Earlier, cntlid(controller id) used to be always 0 because
> > > we didn't have any subsystem scheme that controller id matters.
> > > 
> > > This patch introduced 'cntlid' attribute to the nvme controller
> > > instance(NvmeCtrl) and make it allocated by the nvme-subsys device
> > > mapped to the controller.  If nvme-subsys is not given to the
> > > controller, then it will always be 0 as it was.
> > > 
> > > Added 'ctrls' array in the nvme-subsys instance to manage attached
> > > controllers to the subsystem with a limit(32).  This patch didn't take
> > > list for the controllers to make it seamless with nvme-ns device.
> > > 
> > > Signed-off-by: Minwoo Im 
> > > ---
> > >  hw/block/nvme-subsys.c | 21 +
> > >  hw/block/nvme-subsys.h |  4 
> > >  hw/block/nvme.c| 29 +
> > >  hw/block/nvme.h|  1 +
> > >  4 files changed, 55 insertions(+)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index b525fca14103..7138389be4bd 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -4481,6 +4484,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
> > > *pci_dev)
> > >  id->psd[0].enlat = cpu_to_le32(0x10);
> > >  id->psd[0].exlat = cpu_to_le32(0x4);
> > >  
> > > +if (n->subsys) {
> > > +id->cmic |= NVME_CMIC_MULTI_CTRL;
> > > +}
> > 
> > Since multiple controllers show up with a PCIe port of their own, do we
> > need to set bit 0 (NVME_CMIC_MULTI_PORT?) as well? Or am I
> > misunderstanding that bit?
> 
> AIUI, if you report this MULTI_PORT bit, then each PCI device in the
> subsystem needs to report a different "Port Number" in their PCIe Link
> Capabilities register. I don't think we can manipulate that value from
> the nvme "device", but I also don't know what a host should do with this
> information even if we could. So, I think it's safe to leave it at 0.

AFAIK, If we leave it to 0, kernel will not allocate disk for multi-path
case (e.g., nvmeXcYnZ).



Re: [PATCH V6 0/6] hw/block/nvme: support multi-path for ctrl/ns

2021-01-25 Thread Minwoo Im
On 21-01-25 21:29:58, Klaus Jensen wrote:
> On Jan 24 11:54, Minwoo Im wrote:
> > Hello,
> > 
> > This is sixth patch series for the support of NVMe subsystem scheme with
> > multi-controller and namespace sharing in a subsystem.
> > 
> > This version has a fix in nvme_init_ctrl() when 'cntlid' is set to the
> > Identify Controller data structure by making it by cpu_to_le16() as
> > Keith reviewed.
> > 
> > Here's test result with a simple 'nvme list -v' command from this model:
> > 
> >   -device nvme-subsys,id=subsys0 \
> >   -device nvme,serial=foo,id=nvme0,subsys=subsys0 \
> >   -device nvme,serial=bar,id=nvme1,subsys=subsys0 \
> >   -device nvme,serial=baz,id=nvme2,subsys=subsys0 \
> >   -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0 \
> >   -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2 \
> >   \
> >   -device nvme,serial=qux,id=nvme3 \
> >   -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3 \
> >   \
> >   -device nvme-subsys,id=subsys1 \
> >   -device nvme,serial=quux,id=nvme4,subsys=subsys1 \
> >   -device nvme-ns,id=ns4,drive=drv13,nsid=1,subsys=subsys1,zoned=true \
> > 
> >   root@vm:~/work# nvme list -v
> >   NVM Express Subsystems
> > 
> >   SubsystemSubsystem-NQN
> > Controllers
> >    
> > 
> >  
> >   nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
> > nvme0, nvme1, nvme2
> >   nvme-subsys3 nqn.2019-08.org.qemu:qux 
> > nvme3
> >   nvme-subsys4 nqn.2019-08.org.qemu:subsys1 
> > nvme4
> > 
> >   NVM Express Controllers
> > 
> >   Device   SN   MN   FR 
> >   TxPort AddressSubsystemNamespaces
> >      
> >  -- --  
> >   nvme0foo  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:06.0   nvme-subsys1 nvme1c0n1
> >   nvme1bar  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:07.0   nvme-subsys1 nvme1c1n1
> >   nvme2baz  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:08.0   nvme-subsys1 nvme1c2n1, nvme1c2n2
> >   nvme3qux  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:09.0   nvme-subsys3 nvme3n1
> >   nvme4quux QEMU NVMe Ctrl   
> > 1.0  pcie   :00:0a.0   nvme-subsys4 nvme4c4n1
> > 
> >   NVM Express Namespaces
> > 
> >   Device   NSID Usage  Format   
> > Controllers
> >     --  
> > 
> >   nvme1n1  1134.22  MB / 134.22  MB512   B +  0 B   nvme0, 
> > nvme1, nvme2
> >   nvme1n2  2268.44  MB / 268.44  MB512   B +  0 B   nvme2
> >   nvme3n1  3268.44  MB / 268.44  MB512   B +  0 B   nvme3
> >   nvme4n1  1268.44  MB / 268.44  MB512   B +  0 B   nvme4
> > 
> > Thanks,
> > 
> > Since V5:
> >   - Fix endianness for 'cntlid' in Identify Controller data structure.
> > (Keith)
> > 
> > Since V4:
> >   - Code clean-up to snprintf rather than duplicating it and copy.
> > (Keith)
> >   - Documentation for 'subsys' clean-up.  (Keith)
> >   - Remove 'cntlid' param from nvme_init_ctrl().  (Keith)
> >   - Put error_propagate() in nvme_realize().  (Keith)
> > 
> > Since RFC V3:
> >   - Exclude 'deatched' scheme from this series.  This will be covered in
> > the next series by covering all the ns-related admin commands
> > including ZNS and ns-mgmt. (Niklas)
> >   - Rebased on nvme-next.
> >   - Remove RFC tag from this V4.
> > 
> > Since RFC V2:
> >   - Rebased on nvme-next branch with trivial patches from the previous
> > version(V2) applied. (Klaus)
> >   - Fix enumeration type name convention with NvmeIdNs prefix. (Klaus)
> >   - Put 'cntlid' to NvmeCtrl instance in nvme_init_ctrl() which was
> > missed i

Re: [PATCH V4 1/6] hw/block/nvme: introduce nvme-subsys device

2021-01-21 Thread Minwoo Im
On 21-01-21 14:52:02, Keith Busch wrote:
> On Fri, Jan 22, 2021 at 07:09:03AM +0900, Minwoo Im wrote:
> > +static void nvme_subsys_setup(NvmeSubsystem *subsys)
> > +{
> > +char *subnqn;
> > +
> > +subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", 
> > subsys->parent_obj.id);
> > +strpadcpy((char *)subsys->subnqn, sizeof(subsys->subnqn), subnqn, 
> > '\0');
> > +g_free(subnqn);
> 
> Instead of the duplication and copy, you could format the string
> directly into the destination:
> 
> snprintf(subsys->subnqn, sizeof(subsys->subnqn), 
> "nqn.2019-08.org.qemu:%s",
>  subsys->parent_obj.id);

Oh, Thanks. Will fix it!



Re: [PATCH V4 2/6] hw/block/nvme: support to map controller to a subsystem

2021-01-21 Thread Minwoo Im
On 21-01-21 15:03:38, Keith Busch wrote:
> On Fri, Jan 22, 2021 at 07:09:04AM +0900, Minwoo Im wrote:
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -23,6 +23,7 @@
> >   *  max_ioqpairs=, \
> >   *  aerl=, aer_max_queued=, \
> >   *  mdts=,zoned.append_size_limit= \
> > + *  ,subsys= \
> 
> For consistency, the ',' goes in the preceeding line.

I have no idea what happened here :(.  Will fix it. Thanks!



[PATCH V4 3/6] hw/block/nvme: add CMIC enum value for Identify Controller

2021-01-21 Thread Minwoo Im
Added Controller Multi-path I/O and Namespace Sharing Capabilities
(CMIC) field to support multi-controller in the following patches.

This field is in Identify Controller data structure in [76].

Signed-off-by: Minwoo Im 
---
 include/block/nvme.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index e4b918064df9..d6415a869c1c 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1034,6 +1034,10 @@ enum NvmeIdCtrlLpa {
 NVME_LPA_EXTENDED = 1 << 2,
 };
 
+enum NvmeIdCtrlCmic {
+NVME_CMIC_MULTI_CTRL= 1 << 1,
+};
+
 #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
 #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
 #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)
-- 
2.17.1




[PATCH V4 4/6] hw/block/nvme: support for multi-controller in subsystem

2021-01-21 Thread Minwoo Im
We have nvme-subsys and nvme devices mapped together.  To support
multi-controller scheme to this setup, controller identifier(id) has to
be managed.  Earlier, cntlid(controller id) used to be always 0 because
we didn't have any subsystem scheme that controller id matters.

This patch introduced 'cntlid' attribute to the nvme controller
instance(NvmeCtrl) and make it allocated by the nvme-subsys device
mapped to the controller.  If nvme-subsys is not given to the
controller, then it will always be 0 as it was.

Added 'ctrls' array in the nvme-subsys instance to manage attached
controllers to the subsystem with a limit(32).  This patch didn't take
list for the controllers to make it seamless with nvme-ns device.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-subsys.c | 21 +
 hw/block/nvme-subsys.h |  4 
 hw/block/nvme.c| 34 --
 hw/block/nvme.h|  1 +
 4 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index f1dc71d588d9..a01003136b12 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -22,6 +22,27 @@
 #include "nvme.h"
 #include "nvme-subsys.h"
 
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+{
+NvmeSubsystem *subsys = n->subsys;
+int cntlid;
+
+for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
+if (!subsys->ctrls[cntlid]) {
+break;
+}
+}
+
+if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
+error_setg(errp, "no more free controller id");
+return -1;
+}
+
+subsys->ctrls[cntlid] = n;
+
+return cntlid;
+}
+
 static void nvme_subsys_setup(NvmeSubsystem *subsys)
 {
 char *subnqn;
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 40f06a4c7db0..4eba50d96a1d 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -20,6 +20,10 @@ typedef struct NvmeNamespace NvmeNamespace;
 typedef struct NvmeSubsystem {
 DeviceState parent_obj;
 uint8_t subnqn[256];
+
+NvmeCtrl*ctrls[NVME_SUBSYS_MAX_CTRLS];
 } NvmeSubsystem;
 
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
+
 #endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index ab0531492ddd..225f0d3f3a27 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -4427,16 +4427,21 @@ static void nvme_init_subnqn(NvmeCtrl *n)
 }
 }
 
-static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
+static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t cntlid)
 {
 NvmeIdCtrl *id = >id_ctrl;
 uint8_t *pci_conf = pci_dev->config;
 
+n->cntlid = cntlid;
+
 id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
 id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
 strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
 strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
 strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+
+id->cntlid = cntlid;
+
 id->rab = 6;
 id->ieee[0] = 0x00;
 id->ieee[1] = 0x02;
@@ -4483,6 +4488,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->psd[0].enlat = cpu_to_le32(0x10);
 id->psd[0].exlat = cpu_to_le32(0x4);
 
+if (n->subsys) {
+id->cmic |= NVME_CMIC_MULTI_CTRL;
+}
+
 NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
 NVME_CAP_SET_CQR(n->bar.cap, 1);
 NVME_CAP_SET_TO(n->bar.cap, 0xf);
@@ -4497,11 +4506,28 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 n->bar.intmc = n->bar.intms = 0;
 }
 
+static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
+{
+int cntlid;
+
+if (!n->subsys) {
+return 0;
+}
+
+cntlid = nvme_subsys_register_ctrl(n, errp);
+if (cntlid < 0) {
+return -1;
+}
+
+return cntlid;
+}
+
 static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 {
 NvmeCtrl *n = NVME(pci_dev);
 NvmeNamespace *ns;
 Error *local_err = NULL;
+int cntlid;
 
 nvme_check_constraints(n, _err);
 if (local_err) {
@@ -4517,7 +4543,11 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
**errp)
 return;
 }
 
-nvme_init_ctrl(n, pci_dev);
+cntlid = nvme_init_subsys(n, errp);
+if (cntlid < 0) {
+return;
+}
+nvme_init_ctrl(n, pci_dev, cntlid);
 
 /* setup a namespace if the controller drive property was given */
 if (n->namespace.blkconf.blk) {
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 04d4684601fd..b8f5f2d6ffb8 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -134,6 +134,7 @@ typedef struct NvmeCtrl {
 NvmeBus  bus;
 BlockConfconf;
 
+uint16_tcntlid;
 boolqs_created;
 uint32_tpage_size;
 uint16_tpage_bits;
-- 
2.17.1




[PATCH V4 6/6] hw/block/nvme: support for shared namespace in subsystem

2021-01-21 Thread Minwoo Im
nvme-ns device is registered to a nvme controller device during the
initialization in nvme_register_namespace() in case that 'bus' property
is given which means it's mapped to a single controller.

This patch introduced a new property 'subsys' just like the controller
device instance did to map a namespace to a NVMe subsystem.

If 'subsys' property is given to the nvme-ns device, it will belong to
the specified subsystem and will be attached to all controllers in that
subsystem by enabling shared namespace capability in NMIC(Namespace
Multi-path I/O and Namespace Capabilities) in Identify Namespace.

Usage:

  -device nvme-subsys,id=subsys0
  -device nvme,serial=foo,id=nvme0,subsys=subsys0
  -device nvme,serial=bar,id=nvme1,subsys=subsys0
  -device nvme,serial=baz,id=nvme2,subsys=subsys0
  -device nvme-ns,id=ns1,drive=,nsid=1,subsys=subsys0  # Shared
  -device nvme-ns,id=ns2,drive=,nsid=2,bus=nvme2   # Non-shared

  In the above example, 'ns1' will be shared to 'nvme0' and 'nvme1' in
  the same subsystem.  On the other hand, 'ns2' will be attached to the
  'nvme2' only as a private namespace in that subsystem.

All the namespace with 'subsys' parameter will attach all controllers in
the subsystem to the namespace by default.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-ns.c | 23 ++-
 hw/block/nvme-ns.h |  7 +++
 hw/block/nvme-subsys.c | 25 +
 hw/block/nvme-subsys.h |  3 +++
 hw/block/nvme.c| 10 +-
 5 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 62b25cf69bfa..9b493f2ead03 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -63,6 +63,10 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 
 id_ns->npda = id_ns->npdg = npdg - 1;
 
+if (nvme_ns_shared(ns)) {
+id_ns->nmic |= NVME_NMIC_NS_SHARED;
+}
+
 return 0;
 }
 
@@ -365,16 +369,25 @@ static void nvme_ns_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-if (nvme_register_namespace(n, ns, errp)) {
-error_propagate_prepend(errp, local_err,
-"could not register namespace: ");
-return;
+if (ns->subsys) {
+if (nvme_subsys_register_ns(ns, errp)) {
+error_propagate_prepend(errp, local_err,
+"could not setup namespace to subsys: ");
+return;
+}
+} else {
+if (nvme_register_namespace(n, ns, errp)) {
+error_propagate_prepend(errp, local_err,
+"could not register namespace: ");
+return;
+}
 }
-
 }
 
 static Property nvme_ns_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
+DEFINE_PROP_LINK("subsys", NvmeNamespace, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
 DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
 DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 293ac990e3f6..929e78861903 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -47,6 +47,8 @@ typedef struct NvmeNamespace {
 const uint32_t *iocs;
 uint8_t  csi;
 
+NvmeSubsystem   *subsys;
+
 NvmeIdNsZoned   *id_ns_zoned;
 NvmeZone*zone_array;
 QTAILQ_HEAD(, NvmeZone) exp_open_zones;
@@ -77,6 +79,11 @@ static inline uint32_t nvme_nsid(NvmeNamespace *ns)
 return -1;
 }
 
+static inline bool nvme_ns_shared(NvmeNamespace *ns)
+{
+return !!ns->subsys;
+}
+
 static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
 {
 NvmeIdNs *id_ns = >id_ns;
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index a01003136b12..e7efdcae7d0d 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -43,6 +43,31 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 return cntlid;
 }
 
+int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp)
+{
+NvmeSubsystem *subsys = ns->subsys;
+NvmeCtrl *n;
+int i;
+
+if (subsys->namespaces[nvme_nsid(ns)]) {
+error_setg(errp, "namespace %d already registerd to subsy %s",
+   nvme_nsid(ns), subsys->parent_obj.id);
+return -1;
+}
+
+subsys->namespaces[nvme_nsid(ns)] = ns;
+
+for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
+n = subsys->ctrls[i];
+
+if (n && nvme_register_namespace(n, ns, errp)) {
+return -1;
+}
+}
+
+return 0;
+}
+
 static void nvme_subsys_setup(NvmeSubsystem *subsys)
 {
 char *subnqn;
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 4eba50d96a1d..ccf6a71398d3 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -14,6 +14,7 @@
 OBJ

[PATCH V4 5/6] hw/block/nvme: add NMIC enum value for Identify Namespace

2021-01-21 Thread Minwoo Im
Added Namespace Multi-path I/O and Namespace Sharing Capabilities (NMIC)
field to support shared namespace from controller(s).

This field is in Identify Namespace data structure in [30].

Signed-off-by: Minwoo Im 
---
 include/block/nvme.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index d6415a869c1c..ad68cdc2b92d 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1203,6 +1203,10 @@ enum NvmeNsIdentifierType {
 NVME_NIDT_CSI   = 0x04,
 };
 
+enum NvmeIdNsNmic {
+NVME_NMIC_NS_SHARED = 1 << 0,
+};
+
 enum NvmeCsi {
 NVME_CSI_NVM= 0x00,
 NVME_CSI_ZONED  = 0x02,
-- 
2.17.1




[PATCH V4 0/6] hw/block/nvme: support multi-path for ctrl/ns

2021-01-21 Thread Minwoo Im
Hello,

This series is fourth series for the support of NVMe subsystem scheme
with multi-controller and namespace sharing in a subsystem.

This time, I've removed 'detached' scheme introduced in the previous
series out of this series to focus on subsystem scheme in ths series.
The attach/detach scheme will be introduced in the next series with
ns-mgmt functionality.

Here's an example of how-to:

  # Specify a subsystem
  -device nvme-subsys,id=subsys0 \
  -device nvme,serial=foo,id=nvme0,subsys=subsys0 \
  -device nvme,serial=bar,id=nvme1,subsys=subsys0 \
  -device nvme,serial=baz,id=nvme2,subsys=subsys0 \
  -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0 \
  -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2 \
  \
  # Not specify a subsystem
  -device nvme,serial=qux,id=nvme3 \
  -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3 \

# nvme list -v
  NVM Express Subsystems

  SubsystemSubsystem-NQN
Controllers
   

 
  nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
nvme0, nvme1, nvme2
  nvme-subsys3 nqn.2019-08.org.qemu:qux 
nvme3

  NVM Express Controllers

  Device   SN   MN   FR 
  TxPort AddressSubsystemNamespaces  
     
 -- --  
  nvme0foo  QEMU NVMe Ctrl   1.0
  pcie   :00:06.0   nvme-subsys1 nvme1c0n1
  nvme1bar  QEMU NVMe Ctrl   1.0
  pcie   :00:07.0   nvme-subsys1 nvme1c1n1
  nvme2baz  QEMU NVMe Ctrl   1.0
  pcie   :00:08.0   nvme-subsys1 nvme1c2n1, nvme1c2n2
  nvme3qux  QEMU NVMe Ctrl   1.0
  pcie   :00:09.0   nvme-subsys3 nvme3c3n1

  NVM Express Namespaces

  Device   NSID Usage  Format   Controllers 

    --  

  nvme1n1  1134.22  MB / 134.22  MB512   B +  0 B   nvme0, 
nvme1, nvme2
  nvme1n2  2268.44  MB / 268.44  MB512   B +  0 B   nvme2
  nvme3n1  3268.44  MB / 268.44  MB512   B +  0 B   nvme3

Thanks,

Since RFC V3:
  - Exclude 'deatched' scheme from this series.  This will be covered in
the next series by covering all the ns-related admin commands
including ZNS and ns-mgmt. (Niklas)
  - Rebased on nvme-next.
  - Remove RFC tag from this V4.

Since RFC V2:
  - Rebased on nvme-next branch with trivial patches from the previous
version(V2) applied. (Klaus)
  - Fix enumeration type name convention with NvmeIdNs prefix. (Klaus)
  - Put 'cntlid' to NvmeCtrl instance in nvme_init_ctrl() which was
missed in V2.
  - Added 'detached' parameter to nvme-ns device to decide whether to
attach or not to controller(s) in the subsystem. (Klaus)
  - Implemented Identify Active Namespace ID List aprt from Identify
Allocated Namespace ID List by removing fall-thru statement.

Since RFC V1:
  - Updated namespace sharing scheme to be based on nvme-subsys
hierarchy.

Minwoo Im (6):
  hw/block/nvme: introduce nvme-subsys device
  hw/block/nvme: support to map controller to a subsystem
  hw/block/nvme: add CMIC enum value for Identify Controller
  hw/block/nvme: support for multi-controller in subsystem
  hw/block/nvme: add NMIC enum value for Identify Namespace
  hw/block/nvme: support for shared namespace in subsystem

 hw/block/meson.build   |   2 +-
 hw/block/nvme-ns.c |  23 +++--
 hw/block/nvme-ns.h |   7 +++
 hw/block/nvme-subsys.c | 109 +
 hw/block/nvme-subsys.h |  32 
 hw/block/nvme.c|  77 ++---
 hw/block/nvme.h|   4 ++
 include/block/nvme.h   |   8 +++
 8 files changed, 249 insertions(+), 13 deletions(-)
 create mode 100644 hw/block/nvme-subsys.c
 create mode 100644 hw/block/nvme-subsys.h

-- 
2.17.1




[PATCH V4 2/6] hw/block/nvme: support to map controller to a subsystem

2021-01-21 Thread Minwoo Im
nvme controller(nvme) can be mapped to a NVMe subsystem(nvme-subsys).
This patch maps a controller to a subsystem by adding a parameter
'subsys' to the nvme device.

To map a controller to a subsystem, we need to put nvme-subsys first and
then maps the subsystem to the controller:

  -device nvme-subsys,id=subsys0
  -device nvme,serial=foo,id=nvme0,subsys=subsys0

If 'subsys' property is not given to the nvme controller, then subsystem
NQN will be created with serial (e.g., 'foo' in above example),
Otherwise, it will be based on subsys id (e.g., 'subsys0' in above
example).

Signed-off-by: Minwoo Im 
---
 hw/block/nvme.c | 30 ++
 hw/block/nvme.h |  3 +++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index aabccdf36f4b..ab0531492ddd 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -23,6 +23,7 @@
  *  max_ioqpairs=, \
  *  aerl=, aer_max_queued=, \
  *  mdts=,zoned.append_size_limit= \
+ *  ,subsys= \
  *  -device nvme-ns,drive=,bus=,nsid=,\
  *  zoned=
  *  -device nvme-subsys,id=
@@ -44,6 +45,13 @@
  *
  * nvme device parameters
  * ~~
+ * - `subsys`
+ *   NVM Subsystem device. If given, a subsystem NQN will be initialized with
+ *given. Otherwise,  will be taken for subsystem NQN.
+ *   Also, it will enable multi controller capability represented in Identify
+ *   Controller data structure in CMIC (Controller Multi-path I/O and Namesapce
+ *   Sharing Capabilities), if given.
+ *
  * - `aerl`
  *   The Asynchronous Event Request Limit (AERL). Indicates the maximum number
  *   of concurrently outstanding Asynchronous Event Request commands support
@@ -4404,11 +4412,25 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 return 0;
 }
 
+static void nvme_init_subnqn(NvmeCtrl *n)
+{
+NvmeSubsystem *subsys = n->subsys;
+NvmeIdCtrl *id = >id_ctrl;
+char *subnqn;
+
+if (!subsys) {
+subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", n->params.serial);
+strpadcpy((char *)id->subnqn, sizeof(id->subnqn), subnqn, '\0');
+g_free(subnqn);
+} else {
+pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn);
+}
+}
+
 static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
 {
 NvmeIdCtrl *id = >id_ctrl;
 uint8_t *pci_conf = pci_dev->config;
-char *subnqn;
 
 id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
 id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -4455,9 +4477,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
NVME_CTRL_SGLS_BITBUCKET);
 
-subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", n->params.serial);
-strpadcpy((char *)id->subnqn, sizeof(id->subnqn), subnqn, '\0');
-g_free(subnqn);
+nvme_init_subnqn(n);
 
 id->psd[0].mp = cpu_to_le16(0x9c4);
 id->psd[0].enlat = cpu_to_le32(0x10);
@@ -4545,6 +4565,8 @@ static Property nvme_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
 DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND,
  HostMemoryBackend *),
+DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
 DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
 DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
 DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index dee6092bd45f..04d4684601fd 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -2,6 +2,7 @@
 #define HW_NVME_H
 
 #include "block/nvme.h"
+#include "nvme-subsys.h"
 #include "nvme-ns.h"
 
 #define NVME_MAX_NAMESPACES 256
@@ -170,6 +171,8 @@ typedef struct NvmeCtrl {
 
 uint8_t zasl;
 
+NvmeSubsystem   *subsys;
+
 NvmeNamespace   namespace;
 NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES];
 NvmeSQueue  **sq;
-- 
2.17.1




[PATCH V4 1/6] hw/block/nvme: introduce nvme-subsys device

2021-01-21 Thread Minwoo Im
To support multi-path in QEMU NVMe device model, We need to have NVMe
subsystem hierarchy to map controllers and namespaces to a NVMe
subsystem.

This patch introduced a simple nvme-subsys device model.  The subsystem
will be prepared with subsystem NQN with  provided in
nvme-subsys device:

  ex) -device nvme-subsys,id=subsys0: nqn.2019-08.org.qemu:subsys0

Signed-off-by: Minwoo Im 
---
 hw/block/meson.build   |  2 +-
 hw/block/nvme-subsys.c | 63 ++
 hw/block/nvme-subsys.h | 25 +
 hw/block/nvme.c|  3 ++
 4 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 hw/block/nvme-subsys.c
 create mode 100644 hw/block/nvme-subsys.h

diff --git a/hw/block/meson.build b/hw/block/meson.build
index 602ca6c8541d..83ea2d37978d 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -13,7 +13,7 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: 
files('m25p80.c'))
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
 softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
 softmmu_ss.add(when: 'CONFIG_SH4', if_true: files('tc58128.c'))
-softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c'))
+softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 
'nvme-subsys.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
 specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: 
files('vhost-user-blk.c'))
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
new file mode 100644
index ..f1dc71d588d9
--- /dev/null
+++ b/hw/block/nvme-subsys.c
@@ -0,0 +1,63 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im 
+ *
+ * This code is licensed under the GNU GPL v2.  Refer COPYING.
+ */
+
+#include "qemu/units.h"
+#include "qemu/osdep.h"
+#include "qemu/uuid.h"
+#include "qemu/iov.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-core.h"
+#include "hw/block/block.h"
+#include "block/aio.h"
+#include "block/accounting.h"
+#include "sysemu/sysemu.h"
+#include "hw/pci/pci.h"
+#include "nvme.h"
+#include "nvme-subsys.h"
+
+static void nvme_subsys_setup(NvmeSubsystem *subsys)
+{
+char *subnqn;
+
+subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", subsys->parent_obj.id);
+strpadcpy((char *)subsys->subnqn, sizeof(subsys->subnqn), subnqn, '\0');
+g_free(subnqn);
+}
+
+static void nvme_subsys_realize(DeviceState *dev, Error **errp)
+{
+NvmeSubsystem *subsys = NVME_SUBSYS(dev);
+
+nvme_subsys_setup(subsys);
+}
+
+static void nvme_subsys_class_init(ObjectClass *oc, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(oc);
+
+set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+
+dc->realize = nvme_subsys_realize;
+dc->desc = "Virtual NVMe subsystem";
+}
+
+static const TypeInfo nvme_subsys_info = {
+.name = TYPE_NVME_SUBSYS,
+.parent = TYPE_DEVICE,
+.class_init = nvme_subsys_class_init,
+.instance_size = sizeof(NvmeSubsystem),
+};
+
+static void nvme_subsys_register_types(void)
+{
+type_register_static(_subsys_info);
+}
+
+type_init(nvme_subsys_register_types)
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
new file mode 100644
index ..40f06a4c7db0
--- /dev/null
+++ b/hw/block/nvme-subsys.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im 
+ *
+ * This code is licensed under the GNU GPL v2.  Refer COPYING.
+ */
+
+#ifndef NVME_SUBSYS_H
+#define NVME_SUBSYS_H
+
+#define TYPE_NVME_SUBSYS "nvme-subsys"
+#define NVME_SUBSYS(obj) \
+OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+
+#define NVME_SUBSYS_MAX_CTRLS   32
+
+typedef struct NvmeCtrl NvmeCtrl;
+typedef struct NvmeNamespace NvmeNamespace;
+typedef struct NvmeSubsystem {
+DeviceState parent_obj;
+uint8_t subnqn[256];
+} NvmeSubsystem;
+
+#endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 21aec90637fa..aabccdf36f4b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -25,6 +25,7 @@
  *  mdts=,zoned.append_size_limit= \
  *  -device nvme-ns,drive=,bus=,nsid=,\
  *  zoned=
+ *  -device nvme-subsys,id=
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. By default, the
@@ -38,6 +39,8 @@
  *
  * The PMR will use BAR 4/5 exclusively.
  *
+ * To place controller(s) and namespace(s) to a subsystem, then provide
+ * nvme-subsys device as above.
  *
  * nvme device parameters
  * ~~
-- 
2.17.1




Re: [PATCH V4 4/6] hw/block/nvme: support for multi-controller in subsystem

2021-01-21 Thread Minwoo Im
On 21-01-21 15:17:16, Keith Busch wrote:
> On Fri, Jan 22, 2021 at 07:09:06AM +0900, Minwoo Im wrote:
> > -static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
> > +static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t 
> > cntlid)
> >  {
> >  NvmeIdCtrl *id = >id_ctrl;
> >  uint8_t *pci_conf = pci_dev->config;
> >  
> > +n->cntlid = cntlid;
> 
> I don't think 'cntlid' is important enough to be a function parameter.
> You can just set it within the 'NvmeCtrl' struct before calling this
> function like all the other properties.

Okay.  Rather than adding a parameter to this function,
nvme_init_subsys() may take this job to assign cntlid to the controller
instance first.  Let me fix one!

> > @@ -4517,7 +4543,11 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> > **errp)
> >  return;
> >  }
> >  
> > -nvme_init_ctrl(n, pci_dev);
> > +cntlid = nvme_init_subsys(n, errp);
> > +if (cntlid < 0) {
> 
> error_propogate();

Thanks for catching this.



[PATCH V5 2/6] hw/block/nvme: support to map controller to a subsystem

2021-01-22 Thread Minwoo Im
nvme controller(nvme) can be mapped to a NVMe subsystem(nvme-subsys).
This patch maps a controller to a subsystem by adding a parameter
'subsys' to the nvme device.

To map a controller to a subsystem, we need to put nvme-subsys first and
then maps the subsystem to the controller:

  -device nvme-subsys,id=subsys0
  -device nvme,serial=foo,id=nvme0,subsys=subsys0

If 'subsys' property is not given to the nvme controller, then subsystem
NQN will be created with serial (e.g., 'foo' in above example),
Otherwise, it will be based on subsys id (e.g., 'subsys0' in above
example).

Signed-off-by: Minwoo Im 
---
 hw/block/nvme.c | 30 +-
 hw/block/nvme.h |  3 +++
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index aabccdf36f4b..b525fca14103 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -22,7 +22,8 @@
  *  [pmrdev=,] \
  *  max_ioqpairs=, \
  *  aerl=, aer_max_queued=, \
- *  mdts=,zoned.append_size_limit= \
+ *  mdts=,zoned.append_size_limit=, \
+ *  subsys= \
  *  -device nvme-ns,drive=,bus=,nsid=,\
  *  zoned=
  *  -device nvme-subsys,id=
@@ -44,6 +45,13 @@
  *
  * nvme device parameters
  * ~~
+ * - `subsys`
+ *   NVM Subsystem device. If given, a subsystem NQN will be initialized with
+ *given. Otherwise,  will be taken for subsystem NQN.
+ *   Also, it will enable multi controller capability represented in Identify
+ *   Controller data structure in CMIC (Controller Multi-path I/O and Namesapce
+ *   Sharing Capabilities), if given.
+ *
  * - `aerl`
  *   The Asynchronous Event Request Limit (AERL). Indicates the maximum number
  *   of concurrently outstanding Asynchronous Event Request commands support
@@ -4404,11 +4412,23 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 return 0;
 }
 
+static void nvme_init_subnqn(NvmeCtrl *n)
+{
+NvmeSubsystem *subsys = n->subsys;
+NvmeIdCtrl *id = >id_ctrl;
+
+if (!subsys) {
+snprintf((char *)id->subnqn, sizeof(id->subnqn),
+ "nqn.2019-08.org.qemu:%s", n->params.serial);
+} else {
+pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn);
+}
+}
+
 static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
 {
 NvmeIdCtrl *id = >id_ctrl;
 uint8_t *pci_conf = pci_dev->config;
-char *subnqn;
 
 id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
 id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -4455,9 +4475,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
NVME_CTRL_SGLS_BITBUCKET);
 
-subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", n->params.serial);
-strpadcpy((char *)id->subnqn, sizeof(id->subnqn), subnqn, '\0');
-g_free(subnqn);
+nvme_init_subnqn(n);
 
 id->psd[0].mp = cpu_to_le16(0x9c4);
 id->psd[0].enlat = cpu_to_le32(0x10);
@@ -4545,6 +4563,8 @@ static Property nvme_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
 DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND,
  HostMemoryBackend *),
+DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
 DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
 DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
 DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index dee6092bd45f..04d4684601fd 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -2,6 +2,7 @@
 #define HW_NVME_H
 
 #include "block/nvme.h"
+#include "nvme-subsys.h"
 #include "nvme-ns.h"
 
 #define NVME_MAX_NAMESPACES 256
@@ -170,6 +171,8 @@ typedef struct NvmeCtrl {
 
 uint8_t zasl;
 
+NvmeSubsystem   *subsys;
+
 NvmeNamespace   namespace;
 NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES];
 NvmeSQueue  **sq;
-- 
2.17.1




[PATCH V5 1/6] hw/block/nvme: introduce nvme-subsys device

2021-01-22 Thread Minwoo Im
To support multi-path in QEMU NVMe device model, We need to have NVMe
subsystem hierarchy to map controllers and namespaces to a NVMe
subsystem.

This patch introduced a simple nvme-subsys device model.  The subsystem
will be prepared with subsystem NQN with  provided in
nvme-subsys device:

  ex) -device nvme-subsys,id=subsys0: nqn.2019-08.org.qemu:subsys0

Signed-off-by: Minwoo Im 
---
 hw/block/meson.build   |  2 +-
 hw/block/nvme-subsys.c | 60 ++
 hw/block/nvme-subsys.h | 25 ++
 hw/block/nvme.c|  3 +++
 4 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 hw/block/nvme-subsys.c
 create mode 100644 hw/block/nvme-subsys.h

diff --git a/hw/block/meson.build b/hw/block/meson.build
index 602ca6c8541d..83ea2d37978d 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -13,7 +13,7 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: 
files('m25p80.c'))
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
 softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
 softmmu_ss.add(when: 'CONFIG_SH4', if_true: files('tc58128.c'))
-softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c'))
+softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 
'nvme-subsys.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
 specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: 
files('vhost-user-blk.c'))
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
new file mode 100644
index ..aa82911b951c
--- /dev/null
+++ b/hw/block/nvme-subsys.c
@@ -0,0 +1,60 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im 
+ *
+ * This code is licensed under the GNU GPL v2.  Refer COPYING.
+ */
+
+#include "qemu/units.h"
+#include "qemu/osdep.h"
+#include "qemu/uuid.h"
+#include "qemu/iov.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-core.h"
+#include "hw/block/block.h"
+#include "block/aio.h"
+#include "block/accounting.h"
+#include "sysemu/sysemu.h"
+#include "hw/pci/pci.h"
+#include "nvme.h"
+#include "nvme-subsys.h"
+
+static void nvme_subsys_setup(NvmeSubsystem *subsys)
+{
+snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
+ "nqn.2019-08.org.qemu:%s", subsys->parent_obj.id);
+}
+
+static void nvme_subsys_realize(DeviceState *dev, Error **errp)
+{
+NvmeSubsystem *subsys = NVME_SUBSYS(dev);
+
+nvme_subsys_setup(subsys);
+}
+
+static void nvme_subsys_class_init(ObjectClass *oc, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(oc);
+
+set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+
+dc->realize = nvme_subsys_realize;
+dc->desc = "Virtual NVMe subsystem";
+}
+
+static const TypeInfo nvme_subsys_info = {
+.name = TYPE_NVME_SUBSYS,
+.parent = TYPE_DEVICE,
+.class_init = nvme_subsys_class_init,
+.instance_size = sizeof(NvmeSubsystem),
+};
+
+static void nvme_subsys_register_types(void)
+{
+type_register_static(_subsys_info);
+}
+
+type_init(nvme_subsys_register_types)
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
new file mode 100644
index ..40f06a4c7db0
--- /dev/null
+++ b/hw/block/nvme-subsys.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im 
+ *
+ * This code is licensed under the GNU GPL v2.  Refer COPYING.
+ */
+
+#ifndef NVME_SUBSYS_H
+#define NVME_SUBSYS_H
+
+#define TYPE_NVME_SUBSYS "nvme-subsys"
+#define NVME_SUBSYS(obj) \
+OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+
+#define NVME_SUBSYS_MAX_CTRLS   32
+
+typedef struct NvmeCtrl NvmeCtrl;
+typedef struct NvmeNamespace NvmeNamespace;
+typedef struct NvmeSubsystem {
+DeviceState parent_obj;
+uint8_t subnqn[256];
+} NvmeSubsystem;
+
+#endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 21aec90637fa..aabccdf36f4b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -25,6 +25,7 @@
  *  mdts=,zoned.append_size_limit= \
  *  -device nvme-ns,drive=,bus=,nsid=,\
  *  zoned=
+ *  -device nvme-subsys,id=
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. By default, the
@@ -38,6 +39,8 @@
  *
  * The PMR will use BAR 4/5 exclusively.
  *
+ * To place controller(s) and namespace(s) to a subsystem, then provide
+ * nvme-subsys device as above.
  *
  * nvme device parameters
  * ~~
-- 
2.17.1




[PATCH V5 0/6] hw/block/nvme: support multi-path for ctrl/ns

2021-01-22 Thread Minwoo Im
Hello,

Here's fifth patch series for the support of NVMe subsystem scheme with
multi-controller and namespace sharing in a subsystem.

This series has applied review comments from the previous series,
mostly from Keith's review.  Thanks Keith!

Here's test result with a simple 'nvme list -v' command from this model
with adding a ZNS example with subsys.

  -device nvme-subsys,id=subsys0 \
  -device nvme,serial=foo,id=nvme0,subsys=subsys0 \
  -device nvme,serial=bar,id=nvme1,subsys=subsys0 \
  -device nvme,serial=baz,id=nvme2,subsys=subsys0 \
  -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0 \
  -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2 \
  \
  -device nvme,serial=qux,id=nvme3 \
  -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3 \
  \
  -device nvme-subsys,id=subsys1 \
  -device nvme,serial=quux,id=nvme4,subsys=subsys1 \
  -device nvme-ns,id=ns4,drive=drv13,nsid=1,subsys=subsys1,zoned=true \

  root@vm:~/work# nvme list -v
  NVM Express Subsystems

  SubsystemSubsystem-NQN
Controllers
   

 
  nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
nvme0, nvme1, nvme2
  nvme-subsys3 nqn.2019-08.org.qemu:qux 
nvme3
  nvme-subsys4 nqn.2019-08.org.qemu:subsys1 
nvme4

  NVM Express Controllers

  Device   SN   MN   FR 
  TxPort AddressSubsystemNamespaces
     
 -- --  
  nvme0foo  QEMU NVMe Ctrl   1.0
  pcie   :00:06.0   nvme-subsys1 nvme1c0n1
  nvme1bar  QEMU NVMe Ctrl   1.0
  pcie   :00:07.0   nvme-subsys1 nvme1c1n1
  nvme2baz  QEMU NVMe Ctrl   1.0
  pcie   :00:08.0   nvme-subsys1 nvme1c2n1, nvme1c2n2
  nvme3qux  QEMU NVMe Ctrl   1.0
  pcie   :00:09.0   nvme-subsys3 nvme3n1
  nvme4quux QEMU NVMe Ctrl   1.0
  pcie   :00:0a.0   nvme-subsys4 nvme4c4n1

  NVM Express Namespaces

  Device   NSID Usage  Format   Controllers
    --  

  nvme1n1  1134.22  MB / 134.22  MB512   B +  0 B   nvme0, 
nvme1, nvme2
  nvme1n2  2268.44  MB / 268.44  MB512   B +  0 B   nvme2
  nvme3n1  3268.44  MB / 268.44  MB512   B +  0 B   nvme3
  nvme4n1  1268.44  MB / 268.44  MB512   B +  0 B   nvme4

Thanks,

Since V4:
  - Code clean-up to snprintf rather than duplicating it and copy.
(Keith)
  - Documentation for 'subsys' clean-up.  (Keith)
  - Remove 'cntlid' param from nvme_init_ctrl().  (Keith)
  - Put error_propagate() in nvme_realize().  (Keith)

Since RFC V3:
  - Exclude 'deatched' scheme from this series.  This will be covered in
the next series by covering all the ns-related admin commands
including ZNS and ns-mgmt. (Niklas)
  - Rebased on nvme-next.
  - Remove RFC tag from this V4.

Since RFC V2:
  - Rebased on nvme-next branch with trivial patches from the previous
version(V2) applied. (Klaus)
  - Fix enumeration type name convention with NvmeIdNs prefix. (Klaus)
  - Put 'cntlid' to NvmeCtrl instance in nvme_init_ctrl() which was
missed in V2.
  - Added 'detached' parameter to nvme-ns device to decide whether to
attach or not to controller(s) in the subsystem. (Klaus)
  - Implemented Identify Active Namespace ID List aprt from Identify
Allocated Namespace ID List by removing fall-thru statement.

Since RFC V1:
  - Updated namespace sharing scheme to be based on nvme-subsys
hierarchy.

Minwoo Im (6):
  hw/block/nvme: introduce nvme-subsys device
  hw/block/nvme: support to map controller to a subsystem
  hw/block/nvme: add CMIC enum value for Identify Controller
  hw/block/nvme: support for multi-controller in subsystem
  hw/block/nvme: add NMIC enum value for Identify Namespace
  hw/block/nvme: support for shared namespace in subsystem

 hw/block/meson.build   |   2 +-
 hw/block/nvme-ns.c |  23 +++--
 hw/block/nvme-ns.h |   7 +++
 hw/block/nvme-subsys.c | 106 +
 hw/block/nvme-subsys.h |  32 +
 hw/block/nvme.c|  72 +---
 hw/block/nvme.h|   4 ++
 include/block/nvme.h   |   8 
 8 files changed, 242 insertions(+), 12 deletions(-)
 create

[PATCH V5 4/6] hw/block/nvme: support for multi-controller in subsystem

2021-01-22 Thread Minwoo Im
We have nvme-subsys and nvme devices mapped together.  To support
multi-controller scheme to this setup, controller identifier(id) has to
be managed.  Earlier, cntlid(controller id) used to be always 0 because
we didn't have any subsystem scheme that controller id matters.

This patch introduced 'cntlid' attribute to the nvme controller
instance(NvmeCtrl) and make it allocated by the nvme-subsys device
mapped to the controller.  If nvme-subsys is not given to the
controller, then it will always be 0 as it was.

Added 'ctrls' array in the nvme-subsys instance to manage attached
controllers to the subsystem with a limit(32).  This patch didn't take
list for the controllers to make it seamless with nvme-ns device.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-subsys.c | 21 +
 hw/block/nvme-subsys.h |  4 
 hw/block/nvme.c| 29 +
 hw/block/nvme.h|  1 +
 4 files changed, 55 insertions(+)

diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index aa82911b951c..e9d61c993c90 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -22,6 +22,27 @@
 #include "nvme.h"
 #include "nvme-subsys.h"
 
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+{
+NvmeSubsystem *subsys = n->subsys;
+int cntlid;
+
+for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
+if (!subsys->ctrls[cntlid]) {
+break;
+}
+}
+
+if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
+error_setg(errp, "no more free controller id");
+return -1;
+}
+
+subsys->ctrls[cntlid] = n;
+
+return cntlid;
+}
+
 static void nvme_subsys_setup(NvmeSubsystem *subsys)
 {
 snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 40f06a4c7db0..4eba50d96a1d 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -20,6 +20,10 @@ typedef struct NvmeNamespace NvmeNamespace;
 typedef struct NvmeSubsystem {
 DeviceState parent_obj;
 uint8_t subnqn[256];
+
+NvmeCtrl*ctrls[NVME_SUBSYS_MAX_CTRLS];
 } NvmeSubsystem;
 
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
+
 #endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b525fca14103..3dedefb8ebba 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -4435,6 +4435,9 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
 strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
 strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+
+id->cntlid = n->cntlid;
+
 id->rab = 6;
 id->ieee[0] = 0x00;
 id->ieee[1] = 0x02;
@@ -4481,6 +4484,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->psd[0].enlat = cpu_to_le32(0x10);
 id->psd[0].exlat = cpu_to_le32(0x4);
 
+if (n->subsys) {
+id->cmic |= NVME_CMIC_MULTI_CTRL;
+}
+
 NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
 NVME_CAP_SET_CQR(n->bar.cap, 1);
 NVME_CAP_SET_TO(n->bar.cap, 0xf);
@@ -4495,6 +4502,24 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 n->bar.intmc = n->bar.intms = 0;
 }
 
+static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
+{
+int cntlid;
+
+if (!n->subsys) {
+return 0;
+}
+
+cntlid = nvme_subsys_register_ctrl(n, errp);
+if (cntlid < 0) {
+return -1;
+}
+
+n->cntlid = cntlid;
+
+return 0;
+}
+
 static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 {
 NvmeCtrl *n = NVME(pci_dev);
@@ -4515,6 +4540,10 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
**errp)
 return;
 }
 
+if (nvme_init_subsys(n, errp)) {
+error_propagate(errp, local_err);
+return;
+}
 nvme_init_ctrl(n, pci_dev);
 
 /* setup a namespace if the controller drive property was given */
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 04d4684601fd..b8f5f2d6ffb8 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -134,6 +134,7 @@ typedef struct NvmeCtrl {
 NvmeBus  bus;
 BlockConfconf;
 
+uint16_tcntlid;
 boolqs_created;
 uint32_tpage_size;
 uint16_tpage_bits;
-- 
2.17.1




[PATCH V5 5/6] hw/block/nvme: add NMIC enum value for Identify Namespace

2021-01-22 Thread Minwoo Im
Added Namespace Multi-path I/O and Namespace Sharing Capabilities (NMIC)
field to support shared namespace from controller(s).

This field is in Identify Namespace data structure in [30].

Signed-off-by: Minwoo Im 
---
 include/block/nvme.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index d6415a869c1c..ad68cdc2b92d 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1203,6 +1203,10 @@ enum NvmeNsIdentifierType {
 NVME_NIDT_CSI   = 0x04,
 };
 
+enum NvmeIdNsNmic {
+NVME_NMIC_NS_SHARED = 1 << 0,
+};
+
 enum NvmeCsi {
 NVME_CSI_NVM= 0x00,
 NVME_CSI_ZONED  = 0x02,
-- 
2.17.1




[PATCH V5 3/6] hw/block/nvme: add CMIC enum value for Identify Controller

2021-01-22 Thread Minwoo Im
Added Controller Multi-path I/O and Namespace Sharing Capabilities
(CMIC) field to support multi-controller in the following patches.

This field is in Identify Controller data structure in [76].

Signed-off-by: Minwoo Im 
---
 include/block/nvme.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index e4b918064df9..d6415a869c1c 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1034,6 +1034,10 @@ enum NvmeIdCtrlLpa {
 NVME_LPA_EXTENDED = 1 << 2,
 };
 
+enum NvmeIdCtrlCmic {
+NVME_CMIC_MULTI_CTRL= 1 << 1,
+};
+
 #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
 #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
 #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)
-- 
2.17.1




[PATCH V5 6/6] hw/block/nvme: support for shared namespace in subsystem

2021-01-22 Thread Minwoo Im
nvme-ns device is registered to a nvme controller device during the
initialization in nvme_register_namespace() in case that 'bus' property
is given which means it's mapped to a single controller.

This patch introduced a new property 'subsys' just like the controller
device instance did to map a namespace to a NVMe subsystem.

If 'subsys' property is given to the nvme-ns device, it will belong to
the specified subsystem and will be attached to all controllers in that
subsystem by enabling shared namespace capability in NMIC(Namespace
Multi-path I/O and Namespace Capabilities) in Identify Namespace.

Usage:

  -device nvme-subsys,id=subsys0
  -device nvme,serial=foo,id=nvme0,subsys=subsys0
  -device nvme,serial=bar,id=nvme1,subsys=subsys0
  -device nvme,serial=baz,id=nvme2,subsys=subsys0
  -device nvme-ns,id=ns1,drive=,nsid=1,subsys=subsys0  # Shared
  -device nvme-ns,id=ns2,drive=,nsid=2,bus=nvme2   # Non-shared

  In the above example, 'ns1' will be shared to 'nvme0' and 'nvme1' in
  the same subsystem.  On the other hand, 'ns2' will be attached to the
  'nvme2' only as a private namespace in that subsystem.

All the namespace with 'subsys' parameter will attach all controllers in
the subsystem to the namespace by default.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-ns.c | 23 ++-
 hw/block/nvme-ns.h |  7 +++
 hw/block/nvme-subsys.c | 25 +
 hw/block/nvme-subsys.h |  3 +++
 hw/block/nvme.c| 10 +-
 5 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 62b25cf69bfa..9b493f2ead03 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -63,6 +63,10 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 
 id_ns->npda = id_ns->npdg = npdg - 1;
 
+if (nvme_ns_shared(ns)) {
+id_ns->nmic |= NVME_NMIC_NS_SHARED;
+}
+
 return 0;
 }
 
@@ -365,16 +369,25 @@ static void nvme_ns_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-if (nvme_register_namespace(n, ns, errp)) {
-error_propagate_prepend(errp, local_err,
-"could not register namespace: ");
-return;
+if (ns->subsys) {
+if (nvme_subsys_register_ns(ns, errp)) {
+error_propagate_prepend(errp, local_err,
+"could not setup namespace to subsys: ");
+return;
+}
+} else {
+if (nvme_register_namespace(n, ns, errp)) {
+error_propagate_prepend(errp, local_err,
+"could not register namespace: ");
+return;
+}
 }
-
 }
 
 static Property nvme_ns_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
+DEFINE_PROP_LINK("subsys", NvmeNamespace, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
 DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
 DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 293ac990e3f6..929e78861903 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -47,6 +47,8 @@ typedef struct NvmeNamespace {
 const uint32_t *iocs;
 uint8_t  csi;
 
+NvmeSubsystem   *subsys;
+
 NvmeIdNsZoned   *id_ns_zoned;
 NvmeZone*zone_array;
 QTAILQ_HEAD(, NvmeZone) exp_open_zones;
@@ -77,6 +79,11 @@ static inline uint32_t nvme_nsid(NvmeNamespace *ns)
 return -1;
 }
 
+static inline bool nvme_ns_shared(NvmeNamespace *ns)
+{
+return !!ns->subsys;
+}
+
 static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
 {
 NvmeIdNs *id_ns = >id_ns;
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index e9d61c993c90..641de33e99fc 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -43,6 +43,31 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 return cntlid;
 }
 
+int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp)
+{
+NvmeSubsystem *subsys = ns->subsys;
+NvmeCtrl *n;
+int i;
+
+if (subsys->namespaces[nvme_nsid(ns)]) {
+error_setg(errp, "namespace %d already registerd to subsy %s",
+   nvme_nsid(ns), subsys->parent_obj.id);
+return -1;
+}
+
+subsys->namespaces[nvme_nsid(ns)] = ns;
+
+for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
+n = subsys->ctrls[i];
+
+if (n && nvme_register_namespace(n, ns, errp)) {
+return -1;
+}
+}
+
+return 0;
+}
+
 static void nvme_subsys_setup(NvmeSubsystem *subsys)
 {
 snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 4eba50d96a1d..ccf6a71398d3 100644
--- a/hw/block/nvme-subsys.h
+++ 

[RFC PATCH V3 0/8] hw/block/nvme: support multi-path for ctrl/ns

2021-01-19 Thread Minwoo Im
Hello,

This patch series is third one to support multi-controller and namespace
sharing in multi-path.  This series introduced subsystem scheme to
manage controller(s) and namespace(s) in the subsystem.

This series has new patches from the V2:  'detached' parameter has been
added to the nvme-ns device.  This will decide whether to attach the
namespace to controller(s) in the current subsystem or not.  If it's
given with true, then it will be just allocated in the subsystem, but
not attaching to any controllers in the subsystem.  Otherwise, it will
automatically attach to all the controllers in the subsystem.  The other
t hing is that the last patch implemented Identify Active Namespace ID
List command handler apart from the Allocated Namespace ID List.

Run with:
  -device nvme,serial=qux,id=nvme3
  -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3

  -device nvme-subsys,id=subsys0
  -device nvme,serial=foo,id=nvme0,subsys=subsys0
  -device nvme,serial=bar,id=nvme1,subsys=subsys0
  -device nvme,serial=baz,id=nvme2,subsys=subsys0
  -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0,detached=true
  -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2

nvme-cli:
  root@vm:~/work# nvme list -v  

  NVM Express Subsystems
 

 
  SubsystemSubsystem-NQN
Controllers
   

 
  nvme-subsys0 nqn.2019-08.org.qemu:qux 
nvme0
  nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
nvme1, nvme2, nvme3

   
  NVM Express Controllers   


  
  Device   SN   MN   FR 
  TxPort AddressSubsystemNamespaces
     
 -- --  
  nvme0qux  QEMU NVMe Ctrl   1.0
  pcie   :00:06.0   nvme-subsys0
  nvme1foo  QEMU NVMe Ctrl   1.0
  pcie   :00:07.0   nvme-subsys1
  nvme2bar  QEMU NVMe Ctrl   1.0
  pcie   :00:08.0   nvme-subsys1
  nvme3baz  QEMU NVMe Ctrl   1.0
  pcie   :00:09.0   nvme-subsys1 nvme1n1

  
  NVM Express Namespaces 
   
  Device   NSID Usage  Format   Controllers
    --  

  nvme0n1  3268.44  MB / 268.44  MB512   B +  0 B   nvme0
  nvme1n1  2268.44  MB / 268.44  MB512   B +  0 B   nvme3

Now we have [allocated|not-allocated]/[attached/detached] scheme for
namespaces and controllers.  The next series is going to be to support
namespace management and attachment with few Identify command handlers.

Please review.

Thanks!

Since RFC V2:
  - Rebased on nvme-next branch with trivial patches from the previous
version(V2) applied. (Klaus)
  - Fix enumeration type name convention with NvmeIdNs prefix. (Klaus)
  - Put 'cntlid' to NvmeCtrl instance in nvme_init_ctrl() which was
missed in V2.
  - Added 'detached' parameter to nvme-ns device to decide whether to
attach or not to controller(s) in the subsystem. (Klaus)
  - Implemented Identify Active Namespace ID List aprt from Identify
Allocated Namespace ID List by removing fall-thru statement.

Since RFC V1:
  - Updated namespace sharing scheme to be based on nvme-subsys
hierarchy.

Minwoo Im (8):
  hw/block/nvme: introduce nvme-subsys device
  hw/block/nvme: support to map controller to a subsystem
  hw/block/nvme: add CMIC enum value for Identify Controller
  hw/block/nvme: support for multi-controller in subsystem
  hw/block/nvme: add NMIC enum value for Identify Namespace
  hw/block/nvme: support for shared namespace in subsystem
  hw

[RFC PATCH V3 7/8] hw/block/nvme: add 'detached' param not to attach namespace

2021-01-19 Thread Minwoo Im
Introduced 'detached' parameter to nvme-ns device.  If given, the
namespace will not be attached to controller(s) in the subsystem.  If
'subsys' is not given with this option, it should be provided with 'bus'
which is for private namespace.

This patch also introduced 'ctrls_bitmap' in NvmeNamespace instance to
represent which controler id(cntlid) is attached to this namespace
device.  A namespace can be attached to multiple controllers in a
subsystem so that this bitmap maps those two relationships.

The ctrls_bitmap bitmap should not be accessed directly, but through the
helpers introduced in this patch: nvme_ns_is_attached(),
nvme_ns_attach(), nvme_ns_detach().

Note that this patch made identify namespace list data not hold
non-attached namespace ID in nvme_identify_nslist.  Currently, this
command handler is for CNS 0x2(Active) and 0x10(Allocated) both.  The
next patch will introduce a handler for later on.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-ns.c |  9 +
 hw/block/nvme-ns.h |  6 ++
 hw/block/nvme-subsys.c |  2 ++
 hw/block/nvme.c| 31 ++-
 hw/block/nvme.h| 15 +++
 5 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 073f65e49cac..70d42c24065c 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -17,6 +17,7 @@
 #include "qemu/cutils.h"
 #include "qemu/log.h"
 #include "qemu/error-report.h"
+#include "qemu/hbitmap.h"
 #include "hw/block/block.h"
 #include "hw/pci/pci.h"
 #include "sysemu/sysemu.h"
@@ -304,6 +305,11 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, 
Error **errp)
 return 0;
 }
 
+static void nvme_ns_init_state(NvmeNamespace *ns)
+{
+ns->ctrls_bitmap = hbitmap_alloc(NVME_SUBSYS_MAX_CTRLS, 0);
+}
+
 int nvme_ns_setup(NvmeNamespace *ns, Error **errp)
 {
 if (nvme_ns_check_constraints(ns, errp)) {
@@ -314,6 +320,8 @@ int nvme_ns_setup(NvmeNamespace *ns, Error **errp)
 return -1;
 }
 
+nvme_ns_init_state(ns);
+
 if (nvme_ns_init(ns, errp)) {
 return -1;
 }
@@ -381,6 +389,7 @@ static Property nvme_ns_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
 DEFINE_PROP_LINK("subsys", NvmeNamespace, subsys, TYPE_NVME_SUBSYS,
  NvmeSubsystem *),
+DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false),
 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
 DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
 DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 929e78861903..ad2f55931d1b 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -26,6 +26,7 @@ typedef struct NvmeZone {
 } NvmeZone;
 
 typedef struct NvmeNamespaceParams {
+bool detached;
 uint32_t nsid;
 QemuUUID uuid;
 
@@ -48,6 +49,11 @@ typedef struct NvmeNamespace {
 uint8_t  csi;
 
 NvmeSubsystem   *subsys;
+/*
+ * Whether this namespace is attached to a controller or not.  This bitmap
+ * is based on controller id.  This is valid only in case 'subsys' != NULL.
+ */
+HBitmap *ctrls_bitmap;
 
 NvmeIdNsZoned   *id_ns_zoned;
 NvmeZone*zone_array;
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index e7efdcae7d0d..32ad8ef2825a 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -11,6 +11,7 @@
 #include "qemu/uuid.h"
 #include "qemu/iov.h"
 #include "qemu/cutils.h"
+#include "qemu/hbitmap.h"
 #include "qapi/error.h"
 #include "hw/qdev-properties.h"
 #include "hw/qdev-core.h"
@@ -20,6 +21,7 @@
 #include "sysemu/sysemu.h"
 #include "hw/pci/pci.h"
 #include "nvme.h"
+#include "nvme-ns.h"
 #include "nvme-subsys.h"
 
 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 06bccf1b9e9e..2b2c07b36c2b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -26,7 +26,7 @@
  *  ,subsys= \
  *  -device nvme-ns,drive=,bus=,nsid=,\
  *  zoned=, \
- *  subsys=
+ *  subsys=,detached=
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
@@ -77,6 +77,13 @@
  *   controllers in the subsystem. Otherwise, `bus` must be given to attach
  *   this namespace to a specified single controller as a non-shared namespace.
  *
+ * - `detached`
+ *   Not to attach the namespace device to controllers in the NVMe subsystem
+ *   during boot-up.  If not given, namespaces are all attached to all
+ *   controllers in the subsystem by default.
+ *   It's mutual exclusive with 'bus' paraemter.  It's only valid in case
+ *   'subsys' is provided.
+ *
  * Setting `zoned` 

[RFC PATCH V3 1/8] hw/block/nvme: introduce nvme-subsys device

2021-01-19 Thread Minwoo Im
To support multi-path in QEMU NVMe device model, We need to have NVMe
subsystem hierarchy to map controllers and namespaces to a NVMe
subsystem.

This patch introduced a simple nvme-subsys device model.  The subsystem
will be prepared with subsystem NQN with  provided in
nvme-subsys device:

  ex) -device nvme-subsys,id=subsys0: nqn.2019-08.org.qemu:subsys0

Signed-off-by: Minwoo Im 
---
 hw/block/meson.build   |  2 +-
 hw/block/nvme-subsys.c | 63 ++
 hw/block/nvme-subsys.h | 25 +
 hw/block/nvme.c|  3 ++
 4 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 hw/block/nvme-subsys.c
 create mode 100644 hw/block/nvme-subsys.h

diff --git a/hw/block/meson.build b/hw/block/meson.build
index 602ca6c8541d..83ea2d37978d 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -13,7 +13,7 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: 
files('m25p80.c'))
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
 softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
 softmmu_ss.add(when: 'CONFIG_SH4', if_true: files('tc58128.c'))
-softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c'))
+softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 
'nvme-subsys.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
 specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: 
files('vhost-user-blk.c'))
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
new file mode 100644
index ..f1dc71d588d9
--- /dev/null
+++ b/hw/block/nvme-subsys.c
@@ -0,0 +1,63 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im 
+ *
+ * This code is licensed under the GNU GPL v2.  Refer COPYING.
+ */
+
+#include "qemu/units.h"
+#include "qemu/osdep.h"
+#include "qemu/uuid.h"
+#include "qemu/iov.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-core.h"
+#include "hw/block/block.h"
+#include "block/aio.h"
+#include "block/accounting.h"
+#include "sysemu/sysemu.h"
+#include "hw/pci/pci.h"
+#include "nvme.h"
+#include "nvme-subsys.h"
+
+static void nvme_subsys_setup(NvmeSubsystem *subsys)
+{
+char *subnqn;
+
+subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", subsys->parent_obj.id);
+strpadcpy((char *)subsys->subnqn, sizeof(subsys->subnqn), subnqn, '\0');
+g_free(subnqn);
+}
+
+static void nvme_subsys_realize(DeviceState *dev, Error **errp)
+{
+NvmeSubsystem *subsys = NVME_SUBSYS(dev);
+
+nvme_subsys_setup(subsys);
+}
+
+static void nvme_subsys_class_init(ObjectClass *oc, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(oc);
+
+set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+
+dc->realize = nvme_subsys_realize;
+dc->desc = "Virtual NVMe subsystem";
+}
+
+static const TypeInfo nvme_subsys_info = {
+.name = TYPE_NVME_SUBSYS,
+.parent = TYPE_DEVICE,
+.class_init = nvme_subsys_class_init,
+.instance_size = sizeof(NvmeSubsystem),
+};
+
+static void nvme_subsys_register_types(void)
+{
+type_register_static(_subsys_info);
+}
+
+type_init(nvme_subsys_register_types)
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
new file mode 100644
index ..40f06a4c7db0
--- /dev/null
+++ b/hw/block/nvme-subsys.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im 
+ *
+ * This code is licensed under the GNU GPL v2.  Refer COPYING.
+ */
+
+#ifndef NVME_SUBSYS_H
+#define NVME_SUBSYS_H
+
+#define TYPE_NVME_SUBSYS "nvme-subsys"
+#define NVME_SUBSYS(obj) \
+OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+
+#define NVME_SUBSYS_MAX_CTRLS   32
+
+typedef struct NvmeCtrl NvmeCtrl;
+typedef struct NvmeNamespace NvmeNamespace;
+typedef struct NvmeSubsystem {
+DeviceState parent_obj;
+uint8_t subnqn[256];
+} NvmeSubsystem;
+
+#endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 309c26db8ff7..4644937a5c50 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -25,6 +25,7 @@
  *  mdts=,zoned.append_size_limit= \
  *  -device nvme-ns,drive=,bus=,nsid=,\
  *  zoned=
+ *  -device nvme-subsys,id=
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
@@ -37,6 +38,8 @@
  * -object memory-backend-file,id=,share=on,mem-path=, \
  *  size=  -device nvme,...,pmrdev=
  *
+ * To place controller(s) and namespace(s) to a subsystem, then provide
+ * nvme-subsys device as above.
  *
  * nvme device parameters
  * ~~
-- 
2.17.1




[RFC PATCH V3 4/8] hw/block/nvme: support for multi-controller in subsystem

2021-01-19 Thread Minwoo Im
We have nvme-subsys and nvme devices mapped together.  To support
multi-controller scheme to this setup, controller identifier(id) has to
be managed.  Earlier, cntlid(controller id) used to be always 0 because
we didn't have any subsystem scheme that controller id matters.

This patch introduced 'cntlid' attribute to the nvme controller
instance(NvmeCtrl) and make it allocated by the nvme-subsys device
mapped to the controller.  If nvme-subsys is not given to the
controller, then it will always be 0 as it was.

Added 'ctrls' array in the nvme-subsys instance to manage attached
controllers to the subsystem with a limit(32).  This patch didn't take
list for the controllers to make it seamless with nvme-ns device.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-subsys.c | 21 +
 hw/block/nvme-subsys.h |  4 
 hw/block/nvme.c| 34 --
 hw/block/nvme.h|  1 +
 4 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index f1dc71d588d9..a01003136b12 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -22,6 +22,27 @@
 #include "nvme.h"
 #include "nvme-subsys.h"
 
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+{
+NvmeSubsystem *subsys = n->subsys;
+int cntlid;
+
+for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
+if (!subsys->ctrls[cntlid]) {
+break;
+}
+}
+
+if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
+error_setg(errp, "no more free controller id");
+return -1;
+}
+
+subsys->ctrls[cntlid] = n;
+
+return cntlid;
+}
+
 static void nvme_subsys_setup(NvmeSubsystem *subsys)
 {
 char *subnqn;
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 40f06a4c7db0..4eba50d96a1d 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -20,6 +20,10 @@ typedef struct NvmeNamespace NvmeNamespace;
 typedef struct NvmeSubsystem {
 DeviceState parent_obj;
 uint8_t subnqn[256];
+
+NvmeCtrl*ctrls[NVME_SUBSYS_MAX_CTRLS];
 } NvmeSubsystem;
 
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
+
 #endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 3e3b5451ea3d..9f8a739fcd8f 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -4304,16 +4304,21 @@ static void nvme_init_subnqn(NvmeCtrl *n)
 }
 }
 
-static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
+static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t cntlid)
 {
 NvmeIdCtrl *id = >id_ctrl;
 uint8_t *pci_conf = pci_dev->config;
 
+n->cntlid = cntlid;
+
 id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
 id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
 strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
 strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
 strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+
+id->cntlid = cntlid;
+
 id->rab = 6;
 id->ieee[0] = 0x00;
 id->ieee[1] = 0x02;
@@ -4359,6 +4364,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->psd[0].enlat = cpu_to_le32(0x10);
 id->psd[0].exlat = cpu_to_le32(0x4);
 
+if (n->subsys) {
+id->cmic |= NVME_CMIC_MULTI_CTRL;
+}
+
 NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
 NVME_CAP_SET_CQR(n->bar.cap, 1);
 NVME_CAP_SET_TO(n->bar.cap, 0xf);
@@ -4371,11 +4380,28 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 n->bar.intmc = n->bar.intms = 0;
 }
 
+static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
+{
+int cntlid;
+
+if (!n->subsys) {
+return 0;
+}
+
+cntlid = nvme_subsys_register_ctrl(n, errp);
+if (cntlid < 0) {
+return -1;
+}
+
+return cntlid;
+}
+
 static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 {
 NvmeCtrl *n = NVME(pci_dev);
 NvmeNamespace *ns;
 Error *local_err = NULL;
+int cntlid;
 
 nvme_check_constraints(n, _err);
 if (local_err) {
@@ -4391,7 +4417,11 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
**errp)
 return;
 }
 
-nvme_init_ctrl(n, pci_dev);
+cntlid = nvme_init_subsys(n, errp);
+if (cntlid < 0) {
+return;
+}
+nvme_init_ctrl(n, pci_dev, cntlid);
 
 /* setup a namespace if the controller drive property was given */
 if (n->namespace.blkconf.blk) {
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 3fa0e0a15539..c158cc873b59 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -133,6 +133,7 @@ typedef struct NvmeCtrl {
 NvmeBus  bus;
 BlockConfconf;
 
+uint16_tcntlid;
 boolqs_created;
 uint32_tpage_size;
 uint16_tpage_bits;
-- 
2.17.1




[RFC PATCH V3 6/8] hw/block/nvme: support for shared namespace in subsystem

2021-01-19 Thread Minwoo Im
nvme-ns device is registered to a nvme controller device during the
initialization in nvme_register_namespace() in case that 'bus' property
is given which means it's mapped to a single controller.

This patch introduced a new property 'subsys' just like the controller
device instance did to map a namespace to a NVMe subsystem.

If 'subsys' property is given to the nvme-ns device, it will belong to
the specified subsystem and will be attached to all controllers in that
subsystem by enabling shared namespace capability in NMIC(Namespace
Multi-path I/O and Namespace Capabilities) in Identify Namespace.

Usage:

  -device nvme-subsys,id=subsys0
  -device nvme,serial=foo,id=nvme0,subsys=subsys0
  -device nvme,serial=bar,id=nvme1,subsys=subsys0
  -device nvme,serial=baz,id=nvme2,subsys=subsys0
  -device nvme-ns,id=ns1,drive=,nsid=1,subsys=subsys0  # Shared
  -device nvme-ns,id=ns2,drive=,nsid=2,bus=nvme2   # Non-shared

  In the above example, 'ns1' will be shared to 'nvme0' and 'nvme1' in
  the same subsystem.  On the other hand, 'ns2' will be attached to the
  'nvme2' only as a private namespace in that subsystem.

All the namespace with 'subsys' parameter will attach all controllers in
the subsystem to the namespace by default.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-ns.c | 23 ++-
 hw/block/nvme-ns.h |  7 +++
 hw/block/nvme-subsys.c | 25 +
 hw/block/nvme-subsys.h |  3 +++
 hw/block/nvme.c| 10 +-
 5 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index c8b75fa02138..073f65e49cac 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -63,6 +63,10 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 
 id_ns->npda = id_ns->npdg = npdg - 1;
 
+if (nvme_ns_shared(ns)) {
+id_ns->nmic |= NVME_NMIC_NS_SHARED;
+}
+
 return 0;
 }
 
@@ -358,16 +362,25 @@ static void nvme_ns_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-if (nvme_register_namespace(n, ns, errp)) {
-error_propagate_prepend(errp, local_err,
-"could not register namespace: ");
-return;
+if (ns->subsys) {
+if (nvme_subsys_register_ns(ns, errp)) {
+error_propagate_prepend(errp, local_err,
+"could not setup namespace to subsys: ");
+return;
+}
+} else {
+if (nvme_register_namespace(n, ns, errp)) {
+error_propagate_prepend(errp, local_err,
+"could not register namespace: ");
+return;
+}
 }
-
 }
 
 static Property nvme_ns_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
+DEFINE_PROP_LINK("subsys", NvmeNamespace, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
 DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
 DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 293ac990e3f6..929e78861903 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -47,6 +47,8 @@ typedef struct NvmeNamespace {
 const uint32_t *iocs;
 uint8_t  csi;
 
+NvmeSubsystem   *subsys;
+
 NvmeIdNsZoned   *id_ns_zoned;
 NvmeZone*zone_array;
 QTAILQ_HEAD(, NvmeZone) exp_open_zones;
@@ -77,6 +79,11 @@ static inline uint32_t nvme_nsid(NvmeNamespace *ns)
 return -1;
 }
 
+static inline bool nvme_ns_shared(NvmeNamespace *ns)
+{
+return !!ns->subsys;
+}
+
 static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
 {
 NvmeIdNs *id_ns = >id_ns;
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index a01003136b12..e7efdcae7d0d 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -43,6 +43,31 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 return cntlid;
 }
 
+int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp)
+{
+NvmeSubsystem *subsys = ns->subsys;
+NvmeCtrl *n;
+int i;
+
+if (subsys->namespaces[nvme_nsid(ns)]) {
+error_setg(errp, "namespace %d already registerd to subsy %s",
+   nvme_nsid(ns), subsys->parent_obj.id);
+return -1;
+}
+
+subsys->namespaces[nvme_nsid(ns)] = ns;
+
+for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
+n = subsys->ctrls[i];
+
+if (n && nvme_register_namespace(n, ns, errp)) {
+return -1;
+}
+}
+
+return 0;
+}
+
 static void nvme_subsys_setup(NvmeSubsystem *subsys)
 {
 char *subnqn;
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 4eba50d96a1d..ccf6a71398d3 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -14,6 +14,7 @@
 OBJ

[RFC PATCH V3 8/8] hw/block/nvme: Add Identify Active Namespace ID List

2021-01-19 Thread Minwoo Im
Spec v1.4b 6.1.4 "Active and Inactive NSID Types" says:

"Active NSIDs for a controller refer to namespaces that are attached to
that controller. Allocated NSIDs that are inactive for a controller refer
to namespaces that are not attached to that controller."

This patch introduced for Identify Active Namespace ID List (CNS 02h).

Signed-off-by: Minwoo Im 
---
 hw/block/nvme.c | 39 ++-
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 2b2c07b36c2b..7247167b0ee6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -2883,6 +2883,39 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, 
NvmeRequest *req)
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 
+static uint16_t nvme_identify_nslist_active(NvmeCtrl *n, NvmeRequest *req)
+{
+NvmeNamespace *ns;
+NvmeIdentify *c = (NvmeIdentify *)>cmd;
+uint32_t min_nsid = le32_to_cpu(c->nsid);
+uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
+static const int data_len = sizeof(list);
+uint32_t *list_ptr = (uint32_t *)list;
+int i, j = 0;
+
+if (min_nsid >= NVME_NSID_BROADCAST - 1) {
+return NVME_INVALID_NSID | NVME_DNR;
+}
+
+for (i = 1; i <= n->num_namespaces; i++) {
+ns = nvme_ns(n, i);
+if (!ns || ns->params.nsid <= min_nsid) {
+continue;
+}
+
+if (!nvme_ns_is_attached(n, ns)) {
+continue;
+}
+
+list_ptr[j++] = cpu_to_le32(ns->params.nsid);
+if (j == data_len / sizeof(uint32_t)) {
+break;
+}
+}
+
+return nvme_dma(n, list, data_len, DMA_DIRECTION_FROM_DEVICE, req);
+}
+
 static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req)
 {
 NvmeNamespace *ns;
@@ -2914,10 +2947,6 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, 
NvmeRequest *req)
 continue;
 }
 
-if (!nvme_ns_is_attached(n, ns)) {
-continue;
-}
-
 list_ptr[j++] = cpu_to_le32(ns->params.nsid);
 if (j == data_len / sizeof(uint32_t)) {
 break;
@@ -3045,7 +3074,7 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest 
*req)
 case NVME_ID_CNS_CS_CTRL:
 return nvme_identify_ctrl_csi(n, req);
 case NVME_ID_CNS_NS_ACTIVE_LIST:
- /* fall through */
+ return nvme_identify_nslist_active(n, req);
 case NVME_ID_CNS_NS_PRESENT_LIST:
 return nvme_identify_nslist(n, req);
 case NVME_ID_CNS_CS_NS_ACTIVE_LIST:
-- 
2.17.1




[RFC PATCH V3 2/8] hw/block/nvme: support to map controller to a subsystem

2021-01-19 Thread Minwoo Im
nvme controller(nvme) can be mapped to a NVMe subsystem(nvme-subsys).
This patch maps a controller to a subsystem by adding a parameter
'subsys' to the nvme device.

To map a controller to a subsystem, we need to put nvme-subsys first and
then maps the subsystem to the controller:

  -device nvme-subsys,id=subsys0
  -device nvme,serial=foo,id=nvme0,subsys=subsys0

If 'subsys' property is not given to the nvme controller, then subsystem
NQN will be created with serial (e.g., 'foo' in above example),
Otherwise, it will be based on subsys id (e.g., 'subsys0' in above
example).

Signed-off-by: Minwoo Im 
---
 hw/block/nvme.c | 30 ++
 hw/block/nvme.h |  3 +++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 4644937a5c50..3e3b5451ea3d 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -23,6 +23,7 @@
  *  max_ioqpairs=, \
  *  aerl=, aer_max_queued=, \
  *  mdts=,zoned.append_size_limit= \
+ *  ,subsys= \
  *  -device nvme-ns,drive=,bus=,nsid=,\
  *  zoned=
  *  -device nvme-subsys,id=
@@ -43,6 +44,13 @@
  *
  * nvme device parameters
  * ~~
+ * - `subsys`
+ *   NVM Subsystem device. If given, a subsystem NQN will be initialized with
+ *given. Otherwise,  will be taken for subsystem NQN.
+ *   Also, it will enable multi controller capability represented in Identify
+ *   Controller data structure in CMIC (Controller Multi-path I/O and Namesapce
+ *   Sharing Capabilities), if given.
+ *
  * - `aerl`
  *   The Asynchronous Event Request Limit (AERL). Indicates the maximum number
  *   of concurrently outstanding Asynchronous Event Request commands support
@@ -4281,11 +4289,25 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 return 0;
 }
 
+static void nvme_init_subnqn(NvmeCtrl *n)
+{
+NvmeSubsystem *subsys = n->subsys;
+NvmeIdCtrl *id = >id_ctrl;
+char *subnqn;
+
+if (!subsys) {
+subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", n->params.serial);
+strpadcpy((char *)id->subnqn, sizeof(id->subnqn), subnqn, '\0');
+g_free(subnqn);
+} else {
+pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn);
+}
+}
+
 static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
 {
 NvmeIdCtrl *id = >id_ctrl;
 uint8_t *pci_conf = pci_dev->config;
-char *subnqn;
 
 id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
 id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -4331,9 +4353,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
NVME_CTRL_SGLS_BITBUCKET);
 
-subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", n->params.serial);
-strpadcpy((char *)id->subnqn, sizeof(id->subnqn), subnqn, '\0');
-g_free(subnqn);
+nvme_init_subnqn(n);
 
 id->psd[0].mp = cpu_to_le16(0x9c4);
 id->psd[0].enlat = cpu_to_le32(0x10);
@@ -4417,6 +4437,8 @@ static void nvme_exit(PCIDevice *pci_dev)
 
 static Property nvme_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
+DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
 DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND,
  HostMemoryBackend *),
 DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 347c149e7905..3fa0e0a15539 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -2,6 +2,7 @@
 #define HW_NVME_H
 
 #include "block/nvme.h"
+#include "nvme-subsys.h"
 #include "nvme-ns.h"
 
 #define NVME_MAX_NAMESPACES 256
@@ -158,6 +159,8 @@ typedef struct NvmeCtrl {
 
 uint8_t zasl;
 
+NvmeSubsystem   *subsys;
+
 NvmeNamespace   namespace;
 NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES];
 NvmeSQueue  **sq;
-- 
2.17.1




[RFC PATCH V3 5/8] hw/block/nvme: add NMIC enum value for Identify Namespace

2021-01-19 Thread Minwoo Im
Added Namespace Multi-path I/O and Namespace Sharing Capabilities (NMIC)
field to support shared namespace from controller(s).

This field is in Identify Namespace data structure in [30].

Signed-off-by: Minwoo Im 
---
 include/block/nvme.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 733fb35fedde..28404a62b9d2 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1110,6 +1110,10 @@ enum NvmeNsIdentifierType {
 NVME_NIDT_CSI   = 0x04,
 };
 
+enum NvmeIdNsNmic {
+NVME_NMIC_NS_SHARED = 1 << 0,
+};
+
 enum NvmeCsi {
 NVME_CSI_NVM= 0x00,
 NVME_CSI_ZONED  = 0x02,
-- 
2.17.1




Re: [PATCH v2 05/12] hw/block/nvme: allow cmb and pmr to coexist

2021-01-18 Thread Minwoo Im
Reviewed-by: Minwoo Im 



Re: [PATCH v2 04/12] hw/block/nvme: move msix table and pba to BAR 0

2021-01-18 Thread Minwoo Im


On 21-01-18 10:46:57, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> In the interest of supporting both CMB and PMR to be enabled on the same
> device, move the MSI-X table and pending bit array out of BAR 4 and into
> BAR 0.

Nice!

Reviewed-by: Minwoo Im 
Tested-by: Minwoo Im 

Thanks,



Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Minwoo Im
> > Yes, CMB in v1.4 is not backward-compatible, but is it okay to move onto
> > the CMB v1.4 from v1.3 without supporting the v1.3 usage on this device
> > model?
> 
> Next patch moves to v1.4. I wanted to split it because the "bump" patch
> also adds a couple of other v1.4 requires fields. But I understand that
> this is slightly wrong.

Sorry, I meant I'd like to have CMB for v1.3 support along with the v1.4
support in this device model separately.  Maybe I missed the linux-nvme
mailing list for CMB v1.4, but is there no plan to keep supportin CMB
v1.3 in NVMe driver?



Re: [PATCH v2 02/12] hw/block/nvme: fix 64 bit register hi/lo split writes

2021-01-18 Thread Minwoo Im
On 21-01-18 10:46:55, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> 64 bit registers like ASQ and ACQ should be writable by both a hi/lo 32
> bit write combination as well as a plain 64 bit write. The spec does not
> define ordering on the hi/lo split, but the code currently assumes that
> the low order bits are written first. Additionally, the code does not
> consider that another address might already have been written into the
> register, causing the OR'ing to result in a bad address.
> 
> Fix this by explicitly overwriting only the low or high order bits for
> 32 bit writes.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  hw/block/nvme.c | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index bd7e258c3c26..40b9f8ccfc0e 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -3781,19 +3781,21 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr 
> offset, uint64_t data,
>  trace_pci_nvme_mmio_aqattr(data & 0x);
>  break;
>  case 0x28:  /* ASQ */
> -n->bar.asq = data;
> +n->bar.asq = size == 8 ? data :
> +(n->bar.asq & ~0x) | (data & 0x);
 ^^^
If this one is to unmask lower 32bits other than higher 32bits, then
it should be:

(n->bar.asq & ~0xULL)

Also, maybe we should take care of lower than 4bytes as:

.min_access_size = 2,
.max_access_size = 8,

Even we have some error messages up there with:

if (unlikely(size < sizeof(uint32_t))) {
NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall,
   "MMIO write smaller than 32-bits,"
   " offset=0x%"PRIx64", size=%u",
   offset, size);
/* should be ignored, fall through for now */
}

It's a fall-thru error, and that's it.  I would prefer not to have this
error and support for 2bytes access also, OR do not support for 2bytes
access for this MMIO area.

Thanks!



Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Minwoo Im
Hello,

On 21-01-18 10:47:03, Klaus Jensen wrote:
> From: Padmakar Kalghatgi 
> 
> Implement v1.4 logic for configuring the Controller Memory Buffer. This
> is not backward compatible with v1.3, so drivers that only support v1.3
> will not be able to use the CMB anymore.
> 
> Signed-off-by: Padmakar Kalghatgi 
> Signed-off-by: Klaus Jensen 

Yes, CMB in v1.4 is not backward-compatible, but is it okay to move onto
the CMB v1.4 from v1.3 without supporting the v1.3 usage on this device
model?



Re: [PATCH v2 02/12] hw/block/nvme: fix 64 bit register hi/lo split writes

2021-01-18 Thread Minwoo Im
> The spec requires aligned 32 bit accesses (or the size of the register),
> so maybe it's about time we actually ignore instead of falling through.

Agreed.



Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Minwoo Im
On 21-01-18 14:10:50, Klaus Jensen wrote:
> On Jan 18 22:09, Minwoo Im wrote:
> > > > Yes, CMB in v1.4 is not backward-compatible, but is it okay to move onto
> > > > the CMB v1.4 from v1.3 without supporting the v1.3 usage on this device
> > > > model?
> > > 
> > > Next patch moves to v1.4. I wanted to split it because the "bump" patch
> > > also adds a couple of other v1.4 requires fields. But I understand that
> > > this is slightly wrong.
> > 
> > Sorry, I meant I'd like to have CMB for v1.3 support along with the v1.4
> > support in this device model separately.  Maybe I missed the linux-nvme
> > mailing list for CMB v1.4, but is there no plan to keep supportin CMB
> > v1.3 in NVMe driver?
> 
> I posted a patch on linux-nvme for v1.4 support in the kernel. It will
> support both the v1.3 and v1.4 behavior :)

Then, can we maintain CMB v1.3 also in QEMU also along with v1.4 ? :)



Re: [PATCH v2 06/12] hw/block/nvme: rename PMR/CMB shift/mask fields

2021-01-18 Thread Minwoo Im
On 21-01-18 10:46:59, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Use the correct field names.
> 
> Signed-off-by: Klaus Jensen 
> ---
>  include/block/nvme.h | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 86d7fc2f905c..f3cbe17d0971 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -35,8 +35,8 @@ enum NvmeCapShift {
>  CAP_CSS_SHIFT  = 37,
>  CAP_MPSMIN_SHIFT   = 48,
>  CAP_MPSMAX_SHIFT   = 52,
> -CAP_PMR_SHIFT  = 56,
> -CAP_CMB_SHIFT  = 57,
> +CAP_PMRS_SHIFT = 56,
> +CAP_CMBS_SHIFT = 57,
>  };
>  
>  enum NvmeCapMask {
> @@ -49,8 +49,8 @@ enum NvmeCapMask {
>  CAP_CSS_MASK   = 0xff,
>  CAP_MPSMIN_MASK= 0xf,
>  CAP_MPSMAX_MASK= 0xf,
> -CAP_PMR_MASK   = 0x1,
> -CAP_CMB_MASK   = 0x1,
> +CAP_PMRS_MASK  = 0x1,
> +CAP_CMBS_MASK  = 0x1,
>  };
>  
>  #define NVME_CAP_MQES(cap)  (((cap) >> CAP_MQES_SHIFT)   & CAP_MQES_MASK)
> @@ -81,10 +81,10 @@ enum NvmeCapMask {
> << 
> CAP_MPSMIN_SHIFT)
>  #define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & 
> CAP_MPSMAX_MASK)\
> << 
> CAP_MPSMAX_SHIFT)
> -#define NVME_CAP_SET_PMRS(cap, val)   (cap |= (uint64_t)(val & CAP_PMR_MASK) 
>   \
> -   << CAP_PMR_SHIFT)
> -#define NVME_CAP_SET_CMBS(cap, val)   (cap |= (uint64_t)(val & CAP_CMB_MASK) 
>   \
> -   << CAP_CMB_SHIFT)
> +#define NVME_CAP_SET_PMRS(cap, val)   (cap |= (uint64_t)(val & 
> CAP_PMRS_MASK)  \
> +   << CAP_PMRS_SHIFT)
> +#define NVME_CAP_SET_CMBS(cap, val)   (cap |= (uint64_t)(val & 
> CAP_CMBS_MASK)  \
> +   << CAP_CMBS_SHIFT)

Oh, it would have been better folded into [3/12] patch, though.

Changes are looking good to me to represent "Supported".

Reviewed-by: Minwoo Im 

>  
>  enum NvmeCapCss {
>  NVME_CAP_CSS_NVM= 1 << 0,
> -- 
> 2.30.0
> 
> 



[RFC PATCH V3 3/8] hw/block/nvme: add CMIC enum value for Identify Controller

2021-01-19 Thread Minwoo Im
Added Controller Multi-path I/O and Namespace Sharing Capabilities
(CMIC) field to support multi-controller in the following patches.

This field is in Identify Controller data structure in [76].

Signed-off-by: Minwoo Im 
---
 include/block/nvme.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 45b2678db1f0..733fb35fedde 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -941,6 +941,10 @@ enum NvmeIdCtrlLpa {
 NVME_LPA_EXTENDED = 1 << 2,
 };
 
+enum NvmeIdCtrlCmic {
+NVME_CMIC_MULTI_CTRL= 1 << 1,
+};
+
 #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
 #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
 #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)
-- 
2.17.1




Re: [RFC PATCH V3 0/8] hw/block/nvme: support multi-path for ctrl/ns

2021-01-19 Thread Minwoo Im
On 21-01-19 19:18:16, Klaus Jensen wrote:
> On Jan 20 02:01, Minwoo Im wrote:
> > Hello,
> > 
> > This patch series is third one to support multi-controller and namespace
> > sharing in multi-path.  This series introduced subsystem scheme to
> > manage controller(s) and namespace(s) in the subsystem.
> > 
> > This series has new patches from the V2:  'detached' parameter has been
> > added to the nvme-ns device.  This will decide whether to attach the
> > namespace to controller(s) in the current subsystem or not.  If it's
> > given with true, then it will be just allocated in the subsystem, but
> > not attaching to any controllers in the subsystem.  Otherwise, it will
> > automatically attach to all the controllers in the subsystem.  The other
> > t hing is that the last patch implemented Identify Active Namespace ID
> > List command handler apart from the Allocated Namespace ID List.
> > 
> > Run with:
> >   -device nvme,serial=qux,id=nvme3
> >   -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3
> > 
> >   -device nvme-subsys,id=subsys0
> >   -device nvme,serial=foo,id=nvme0,subsys=subsys0
> >   -device nvme,serial=bar,id=nvme1,subsys=subsys0
> >   -device nvme,serial=baz,id=nvme2,subsys=subsys0
> >   -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0,detached=true
> >   -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2
> > 
> > nvme-cli:
> >   root@vm:~/work# nvme list -v  
> > 
> >   NVM Express Subsystems
> >  
> > 
> >  
> >   SubsystemSubsystem-NQN
> > Controllers
> >    
> > 
> >  
> >   nvme-subsys0 nqn.2019-08.org.qemu:qux 
> > nvme0
> >   nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
> > nvme1, nvme2, nvme3
> > 
> >
> >   NVM Express Controllers   
> > 
> > 
> >   
> >   Device   SN   MN   FR 
> >   TxPort AddressSubsystemNamespaces
> >      
> >  -- --  
> >   nvme0qux  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:06.0   nvme-subsys0
> 
> Shouldn't nvme0n1 be listed under Namespaces for nvme0?

Oh, I missed that one from the output.  As Keith mentioned, I ran the
list command again based on the latest nvme-cli.git:

Please refer the following result.  I think it's okay not to send the
cover letter again :)

# nvme --version
nvme version 1.13.48.g33c6

# nvme list -v
NVM Express Subsystems

SubsystemSubsystem-NQN  
  Controllers
 

 
nvme-subsys0 nqn.2019-08.org.qemu:qux   
  nvme0
nvme-subsys1 nqn.2019-08.org.qemu:subsys0   
  nvme1, nvme2, nvme3

NVM Express Controllers

Device   SN   MN   FR   
TxPort AddressSubsystemNamespaces  
    
-- --  
nvme0qux  QEMU NVMe Ctrl   1.0  
pcie   :00:06.0   nvme-subsys0 nvme0n1
nvme1foo  QEMU NVMe Ctrl   1.0  
pcie   :00:07.0   nvme-subsys1 
nvme2bar

Re: [RFC PATCH V3 7/8] hw/block/nvme: add 'detached' param not to attach namespace

2021-01-19 Thread Minwoo Im
> Isn't the HBitmap slightly overkill? Can qemu/bitmap.h suffice?

Definitely, yes, I think.  Current patch series supoprt up to 32
controllers so I think qemu/bitmap.h is enough for us.

Will update the bitmap operations in the next series.



Re: [RFC PATCH V3 0/8] hw/block/nvme: support multi-path for ctrl/ns

2021-01-19 Thread Minwoo Im
> Minwoo, try pulling the most current nvme-cli. There was a sysfs
> scanning bug for non-mpath drives that should be fixed now.

Thank you, Keith!  I've posted list result based on the latest one :)



Re: [PATCH v2 03/12] hw/block/nvme: indicate CMB support through controller capabilities register

2021-01-18 Thread Minwoo Im
Reviewed-by: Minwoo Im 



Re: [PATCH v2 02/12] hw/block/nvme: fix 64 bit register hi/lo split writes

2021-01-18 Thread Minwoo Im
On 21-01-18 20:53:24, Klaus Jensen wrote:
> On Jan 18 21:59, Minwoo Im wrote:
> > > The spec requires aligned 32 bit accesses (or the size of the register),
> > > so maybe it's about time we actually ignore instead of falling through.
> > 
> > Agreed.
> > 
> 
> On the other hand.
> 
> The spec just allows undefined behavior if the alignment or size
> requirement is violated. So falling through is not wrong.

If so, maybe we just can make this MMIO region support under 4bytes
access with error messaging.  I don't think we should not support them
on purpose because spec says it just results in undefined behaviors
which is just undefined how to handle them.



Re: [PATCH v2 10/12] hw/block/nvme: move cmb logic to v1.4

2021-01-18 Thread Minwoo Im
On 21-01-18 20:23:30, Klaus Jensen wrote:
> On Jan 18 14:22, Klaus Jensen wrote:
> > On Jan 18 22:12, Minwoo Im wrote:
> > > On 21-01-18 14:10:50, Klaus Jensen wrote:
> > > > On Jan 18 22:09, Minwoo Im wrote:
> > > > > > > Yes, CMB in v1.4 is not backward-compatible, but is it okay to 
> > > > > > > move onto
> > > > > > > the CMB v1.4 from v1.3 without supporting the v1.3 usage on this 
> > > > > > > device
> > > > > > > model?
> > > > > > 
> > > > > > Next patch moves to v1.4. I wanted to split it because the "bump" 
> > > > > > patch
> > > > > > also adds a couple of other v1.4 requires fields. But I understand 
> > > > > > that
> > > > > > this is slightly wrong.
> > > > > 
> > > > > Sorry, I meant I'd like to have CMB for v1.3 support along with the 
> > > > > v1.4
> > > > > support in this device model separately.  Maybe I missed the 
> > > > > linux-nvme
> > > > > mailing list for CMB v1.4, but is there no plan to keep supportin CMB
> > > > > v1.3 in NVMe driver?
> > > > 
> > > > I posted a patch on linux-nvme for v1.4 support in the kernel. It will
> > > > support both the v1.3 and v1.4 behavior :)
> > > 
> > > Then, can we maintain CMB v1.3 also in QEMU also along with v1.4 ? :)
> > 
> > My first version of this patch actually included compatibility support
> > for v1.3 ("legacy cmb"), but Keith suggested we just drop that and keep
> > this device compliant.
> > 
> > What we could do is allow the spec version to be chosen with a
> > parameter?
> 
> Uhm. Maybe not. I gave this some more thought.
> 
> Adding a device parameter to choose the specification version requires
> us to maintain QEMU "compat" properties across different QEMU version.
> I'm not sure we want that for something like this.

Agreed.  The default should head for latest one.

> 
> Maybe the best course of action actually *is* an 'x-legacy-cmb'
> parameter.

This looks fine.

As kernel driver does not remove the v1.3 CMB support, then it would be
great if QEMU suports that also.



Re: [RFC PATCH V2 10/11] hw/block/nvme: add NMIC enum value for Identify Namespace

2021-01-18 Thread Minwoo Im
> Let's keep convention (or should be convention...) of using NvmeIdNs
> prefix for identify data structure fields on the enum.

Okay!



Re: [RFC PATCH V2 00/11] hw/block/nvme: support multi-path for ctrl/ns

2021-01-18 Thread Minwoo Im
On 21-01-18 22:14:45, Klaus Jensen wrote:
> On Jan 17 23:53, Minwoo Im wrote:
> > Hello,
> > 
> > This patch series introduces NVMe subsystem device to support multi-path
> > I/O in NVMe device model.  Two use-cases are supported along with this
> > patch: Multi-controller, Namespace Sharing.
> > 
> > V1 RFC has been discussed with Klaus and Keith, I really appreciate them
> > for this patch series to have proper direction [1].
> > 
> > This patch series contains few start-up refactoring pathces from the
> > first to fifth patches to make nvme-ns device not to rely on the nvme
> > controller always.  Because nvme-ns shall be able to be mapped to the
> > subsystem level, not a single controller level so that it should provide
> > generic initialization code: nvme_ns_setup() with NvmeCtrl.  To do that,
> > the first five patches are to remove the NvmeCtrl * instance argument
> > from the nvme_ns_setup().  I'd be happy if they are picked!
> > 
> > For controller and namespace devices, 'subsys' property has been
> > introduced to map them to a subsystem.  If multi-controller needed, we
> > can specify 'subsys' to controllers the same.
> > 
> > For namespace deivice, if 'subsys' is not given just like it was, it
> > will have to be provided with 'bus' parameter to specify a nvme
> > controller device to attach, it means, they are mutual-exlusive.  To
> > share a namespace between or among controllers, then nvme-ns should have
> > 'subsys' property to a single nvme subsystem instance.  To make a
> > namespace private one, then we need to specify 'bus' property rather
> > than the 'subsys'.
> > 
> > Of course, this series does not require any updates for the run command
> > for the previos users.
> > 
> > Plase refer the following example with nvme-cli output:
> > 
> > QEMU Run:
> >   -device nvme-subsys,id=subsys0 \
> >   -device nvme,serial=foo,id=nvme0,subsys=subsys0 \
> >   -device nvme,serial=bar,id=nvme1,subsys=subsys0 \
> >   -device nvme,serial=baz,id=nvme2,subsys=subsys0 \
> >   -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0 \
> >   -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2 \
> >   \
> >   -device nvme,serial=qux,id=nvme3 \
> >   -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3
> > 
> > nvme-cli:
> >   root@vm:~/work# nvme list -v
> >   NVM Express Subsystems
> > 
> >   SubsystemSubsystem-NQN
> > Controllers
> >    
> > 
> >  
> >   nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
> > nvme0, nvme1, nvme2
> >   nvme-subsys3 nqn.2019-08.org.qemu:qux 
> > nvme3
> > 
> >   NVM Express Controllers
> > 
> >   Device   SN   MN   FR 
> >   TxPort AddressSubsystemNamespaces
> >      
> >  -- --  
> >   nvme0foo  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:06.0   nvme-subsys1 nvme1n1
> >   nvme1bar  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:07.0   nvme-subsys1 nvme1n1
> >   nvme2baz  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:08.0   nvme-subsys1 nvme1n1, nvme1n2
> >   nvme3qux  QEMU NVMe Ctrl   
> > 1.0  pcie   :00:09.0   nvme-subsys3
> > 
> >   NVM Express Namespaces
> > 
> >   Device   NSID Usage  Format   
> > Controllers
> >     --  
> > 
> >   nvme1n1  1134.22  MB / 134.22  MB512   B +  0 B   nvme0, 
> > nvme1, nvme2
> >   nvme1n2  2268.44  MB / 268.44  MB512   B +  0 B   nvme2
> >   nvme3n1  3268.44  MB / 268.44  MB512   B +  0 B   nvme3
> > 
> > Summary:
> >   - Refactored nvme-ns device not to rely on controller during the
> > setup.  [1/11 - 5/11]
> >   - Introduced a nvme-subsys device model. [6/11]
> >   - Create subsystem NQN based on subsystem. [7/11]

Re: [RFC PATCH V3 0/8] hw/block/nvme: support multi-path for ctrl/ns

2021-01-20 Thread Minwoo Im
On 21-01-20 08:52:14, Klaus Jensen wrote:
> On Jan 20 09:44, Minwoo Im wrote:
> > On 21-01-19 19:18:16, Klaus Jensen wrote:
> > > On Jan 20 02:01, Minwoo Im wrote:
> > > > Hello,
> > > > 
> > > > This patch series is third one to support multi-controller and namespace
> > > > sharing in multi-path.  This series introduced subsystem scheme to
> > > > manage controller(s) and namespace(s) in the subsystem.
> > > > 
> > > > This series has new patches from the V2:  'detached' parameter has been
> > > > added to the nvme-ns device.  This will decide whether to attach the
> > > > namespace to controller(s) in the current subsystem or not.  If it's
> > > > given with true, then it will be just allocated in the subsystem, but
> > > > not attaching to any controllers in the subsystem.  Otherwise, it will
> > > > automatically attach to all the controllers in the subsystem.  The other
> > > > t hing is that the last patch implemented Identify Active Namespace ID
> > > > List command handler apart from the Allocated Namespace ID List.
> > > > 
> > > > Run with:
> > > >   -device nvme,serial=qux,id=nvme3
> > > >   -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3
> > > > 
> > > >   -device nvme-subsys,id=subsys0
> > > >   -device nvme,serial=foo,id=nvme0,subsys=subsys0
> > > >   -device nvme,serial=bar,id=nvme1,subsys=subsys0
> > > >   -device nvme,serial=baz,id=nvme2,subsys=subsys0
> > > >   -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0,detached=true
> > > >   -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2
> > > > 
> > > > nvme-cli:
> > > >   root@vm:~/work# nvme list -v  
> > > > 
> > > >   NVM Express Subsystems
> > > >  
> > > > 
> > > >  
> > > >   SubsystemSubsystem-NQN
> > > > Controllers
> > > >    
> > > > 
> > > >  
> > > >   nvme-subsys0 nqn.2019-08.org.qemu:qux 
> > > > nvme0
> > > >   nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
> > > > nvme1, nvme2, nvme3
> > > > 
> > > >
> > > >   NVM Express Controllers   
> > > > 
> > > > 
> > > >   
> > > >   Device   SN   MN  
> > > >  FR   TxPort AddressSubsystemNamespaces
> > > >     
> > > >   -- -- 
> > > >  
> > > >   nvme0qux  QEMU NVMe Ctrl  
> > > >  1.0  pcie   :00:06.0   nvme-subsys0
> > > 
> > > Shouldn't nvme0n1 be listed under Namespaces for nvme0?
> > 
> > Oh, I missed that one from the output.  As Keith mentioned, I ran the
> > list command again based on the latest nvme-cli.git:
> > 
> > Please refer the following result.  I think it's okay not to send the
> > cover letter again :)
> > 
> > # nvme --version
> > nvme version 1.13.48.g33c6
> > 
> > # nvme list -v
> > NVM Express Subsystems
> > 
> > SubsystemSubsystem-NQN  
> >   Controllers
> >  
> > 
> >  
> > nvme-subsys0 n

[PATCH V6 5/6] hw/block/nvme: add NMIC enum value for Identify Namespace

2021-01-23 Thread Minwoo Im
Added Namespace Multi-path I/O and Namespace Sharing Capabilities (NMIC)
field to support shared namespace from controller(s).

This field is in Identify Namespace data structure in [30].

Signed-off-by: Minwoo Im 
---
 include/block/nvme.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index d6415a869c1c..ad68cdc2b92d 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1203,6 +1203,10 @@ enum NvmeNsIdentifierType {
 NVME_NIDT_CSI   = 0x04,
 };
 
+enum NvmeIdNsNmic {
+NVME_NMIC_NS_SHARED = 1 << 0,
+};
+
 enum NvmeCsi {
 NVME_CSI_NVM= 0x00,
 NVME_CSI_ZONED  = 0x02,
-- 
2.17.1




[PATCH V6 2/6] hw/block/nvme: support to map controller to a subsystem

2021-01-23 Thread Minwoo Im
nvme controller(nvme) can be mapped to a NVMe subsystem(nvme-subsys).
This patch maps a controller to a subsystem by adding a parameter
'subsys' to the nvme device.

To map a controller to a subsystem, we need to put nvme-subsys first and
then maps the subsystem to the controller:

  -device nvme-subsys,id=subsys0
  -device nvme,serial=foo,id=nvme0,subsys=subsys0

If 'subsys' property is not given to the nvme controller, then subsystem
NQN will be created with serial (e.g., 'foo' in above example),
Otherwise, it will be based on subsys id (e.g., 'subsys0' in above
example).

Signed-off-by: Minwoo Im 
---
 hw/block/nvme.c | 30 +-
 hw/block/nvme.h |  3 +++
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index aabccdf36f4b..b525fca14103 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -22,7 +22,8 @@
  *  [pmrdev=,] \
  *  max_ioqpairs=, \
  *  aerl=, aer_max_queued=, \
- *  mdts=,zoned.append_size_limit= \
+ *  mdts=,zoned.append_size_limit=, \
+ *  subsys= \
  *  -device nvme-ns,drive=,bus=,nsid=,\
  *  zoned=
  *  -device nvme-subsys,id=
@@ -44,6 +45,13 @@
  *
  * nvme device parameters
  * ~~
+ * - `subsys`
+ *   NVM Subsystem device. If given, a subsystem NQN will be initialized with
+ *given. Otherwise,  will be taken for subsystem NQN.
+ *   Also, it will enable multi controller capability represented in Identify
+ *   Controller data structure in CMIC (Controller Multi-path I/O and Namesapce
+ *   Sharing Capabilities), if given.
+ *
  * - `aerl`
  *   The Asynchronous Event Request Limit (AERL). Indicates the maximum number
  *   of concurrently outstanding Asynchronous Event Request commands support
@@ -4404,11 +4412,23 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 return 0;
 }
 
+static void nvme_init_subnqn(NvmeCtrl *n)
+{
+NvmeSubsystem *subsys = n->subsys;
+NvmeIdCtrl *id = >id_ctrl;
+
+if (!subsys) {
+snprintf((char *)id->subnqn, sizeof(id->subnqn),
+ "nqn.2019-08.org.qemu:%s", n->params.serial);
+} else {
+pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn);
+}
+}
+
 static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
 {
 NvmeIdCtrl *id = >id_ctrl;
 uint8_t *pci_conf = pci_dev->config;
-char *subnqn;
 
 id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
 id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -4455,9 +4475,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
NVME_CTRL_SGLS_BITBUCKET);
 
-subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", n->params.serial);
-strpadcpy((char *)id->subnqn, sizeof(id->subnqn), subnqn, '\0');
-g_free(subnqn);
+nvme_init_subnqn(n);
 
 id->psd[0].mp = cpu_to_le16(0x9c4);
 id->psd[0].enlat = cpu_to_le32(0x10);
@@ -4545,6 +4563,8 @@ static Property nvme_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
 DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND,
  HostMemoryBackend *),
+DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
 DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
 DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
 DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index dee6092bd45f..04d4684601fd 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -2,6 +2,7 @@
 #define HW_NVME_H
 
 #include "block/nvme.h"
+#include "nvme-subsys.h"
 #include "nvme-ns.h"
 
 #define NVME_MAX_NAMESPACES 256
@@ -170,6 +171,8 @@ typedef struct NvmeCtrl {
 
 uint8_t zasl;
 
+NvmeSubsystem   *subsys;
+
 NvmeNamespace   namespace;
 NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES];
 NvmeSQueue  **sq;
-- 
2.17.1




[PATCH V6 3/6] hw/block/nvme: add CMIC enum value for Identify Controller

2021-01-23 Thread Minwoo Im
Added Controller Multi-path I/O and Namespace Sharing Capabilities
(CMIC) field to support multi-controller in the following patches.

This field is in Identify Controller data structure in [76].

Signed-off-by: Minwoo Im 
---
 include/block/nvme.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index e4b918064df9..d6415a869c1c 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1034,6 +1034,10 @@ enum NvmeIdCtrlLpa {
 NVME_LPA_EXTENDED = 1 << 2,
 };
 
+enum NvmeIdCtrlCmic {
+NVME_CMIC_MULTI_CTRL= 1 << 1,
+};
+
 #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
 #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
 #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)
-- 
2.17.1




[PATCH V6 6/6] hw/block/nvme: support for shared namespace in subsystem

2021-01-23 Thread Minwoo Im
nvme-ns device is registered to a nvme controller device during the
initialization in nvme_register_namespace() in case that 'bus' property
is given which means it's mapped to a single controller.

This patch introduced a new property 'subsys' just like the controller
device instance did to map a namespace to a NVMe subsystem.

If 'subsys' property is given to the nvme-ns device, it will belong to
the specified subsystem and will be attached to all controllers in that
subsystem by enabling shared namespace capability in NMIC(Namespace
Multi-path I/O and Namespace Capabilities) in Identify Namespace.

Usage:

  -device nvme-subsys,id=subsys0
  -device nvme,serial=foo,id=nvme0,subsys=subsys0
  -device nvme,serial=bar,id=nvme1,subsys=subsys0
  -device nvme,serial=baz,id=nvme2,subsys=subsys0
  -device nvme-ns,id=ns1,drive=,nsid=1,subsys=subsys0  # Shared
  -device nvme-ns,id=ns2,drive=,nsid=2,bus=nvme2   # Non-shared

  In the above example, 'ns1' will be shared to 'nvme0' and 'nvme1' in
  the same subsystem.  On the other hand, 'ns2' will be attached to the
  'nvme2' only as a private namespace in that subsystem.

All the namespace with 'subsys' parameter will attach all controllers in
the subsystem to the namespace by default.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-ns.c | 23 ++-
 hw/block/nvme-ns.h |  7 +++
 hw/block/nvme-subsys.c | 25 +
 hw/block/nvme-subsys.h |  3 +++
 hw/block/nvme.c| 10 +-
 5 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 62b25cf69bfa..9b493f2ead03 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -63,6 +63,10 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 
 id_ns->npda = id_ns->npdg = npdg - 1;
 
+if (nvme_ns_shared(ns)) {
+id_ns->nmic |= NVME_NMIC_NS_SHARED;
+}
+
 return 0;
 }
 
@@ -365,16 +369,25 @@ static void nvme_ns_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-if (nvme_register_namespace(n, ns, errp)) {
-error_propagate_prepend(errp, local_err,
-"could not register namespace: ");
-return;
+if (ns->subsys) {
+if (nvme_subsys_register_ns(ns, errp)) {
+error_propagate_prepend(errp, local_err,
+"could not setup namespace to subsys: ");
+return;
+}
+} else {
+if (nvme_register_namespace(n, ns, errp)) {
+error_propagate_prepend(errp, local_err,
+"could not register namespace: ");
+return;
+}
 }
-
 }
 
 static Property nvme_ns_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
+DEFINE_PROP_LINK("subsys", NvmeNamespace, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
 DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
 DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 293ac990e3f6..929e78861903 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -47,6 +47,8 @@ typedef struct NvmeNamespace {
 const uint32_t *iocs;
 uint8_t  csi;
 
+NvmeSubsystem   *subsys;
+
 NvmeIdNsZoned   *id_ns_zoned;
 NvmeZone*zone_array;
 QTAILQ_HEAD(, NvmeZone) exp_open_zones;
@@ -77,6 +79,11 @@ static inline uint32_t nvme_nsid(NvmeNamespace *ns)
 return -1;
 }
 
+static inline bool nvme_ns_shared(NvmeNamespace *ns)
+{
+return !!ns->subsys;
+}
+
 static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
 {
 NvmeIdNs *id_ns = >id_ns;
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index e9d61c993c90..641de33e99fc 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -43,6 +43,31 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 return cntlid;
 }
 
+int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp)
+{
+NvmeSubsystem *subsys = ns->subsys;
+NvmeCtrl *n;
+int i;
+
+if (subsys->namespaces[nvme_nsid(ns)]) {
+error_setg(errp, "namespace %d already registerd to subsy %s",
+   nvme_nsid(ns), subsys->parent_obj.id);
+return -1;
+}
+
+subsys->namespaces[nvme_nsid(ns)] = ns;
+
+for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
+n = subsys->ctrls[i];
+
+if (n && nvme_register_namespace(n, ns, errp)) {
+return -1;
+}
+}
+
+return 0;
+}
+
 static void nvme_subsys_setup(NvmeSubsystem *subsys)
 {
 snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 4eba50d96a1d..ccf6a71398d3 100644
--- a/hw/block/nvme-subsys.h
+++ 

[PATCH V6 4/6] hw/block/nvme: support for multi-controller in subsystem

2021-01-23 Thread Minwoo Im
We have nvme-subsys and nvme devices mapped together.  To support
multi-controller scheme to this setup, controller identifier(id) has to
be managed.  Earlier, cntlid(controller id) used to be always 0 because
we didn't have any subsystem scheme that controller id matters.

This patch introduced 'cntlid' attribute to the nvme controller
instance(NvmeCtrl) and make it allocated by the nvme-subsys device
mapped to the controller.  If nvme-subsys is not given to the
controller, then it will always be 0 as it was.

Added 'ctrls' array in the nvme-subsys instance to manage attached
controllers to the subsystem with a limit(32).  This patch didn't take
list for the controllers to make it seamless with nvme-ns device.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-subsys.c | 21 +
 hw/block/nvme-subsys.h |  4 
 hw/block/nvme.c| 29 +
 hw/block/nvme.h|  1 +
 4 files changed, 55 insertions(+)

diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index aa82911b951c..e9d61c993c90 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -22,6 +22,27 @@
 #include "nvme.h"
 #include "nvme-subsys.h"
 
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+{
+NvmeSubsystem *subsys = n->subsys;
+int cntlid;
+
+for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
+if (!subsys->ctrls[cntlid]) {
+break;
+}
+}
+
+if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
+error_setg(errp, "no more free controller id");
+return -1;
+}
+
+subsys->ctrls[cntlid] = n;
+
+return cntlid;
+}
+
 static void nvme_subsys_setup(NvmeSubsystem *subsys)
 {
 snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 40f06a4c7db0..4eba50d96a1d 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -20,6 +20,10 @@ typedef struct NvmeNamespace NvmeNamespace;
 typedef struct NvmeSubsystem {
 DeviceState parent_obj;
 uint8_t subnqn[256];
+
+NvmeCtrl*ctrls[NVME_SUBSYS_MAX_CTRLS];
 } NvmeSubsystem;
 
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
+
 #endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b525fca14103..7138389be4bd 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -4435,6 +4435,9 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
 strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
 strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+
+id->cntlid = cpu_to_le16(n->cntlid);
+
 id->rab = 6;
 id->ieee[0] = 0x00;
 id->ieee[1] = 0x02;
@@ -4481,6 +4484,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 id->psd[0].enlat = cpu_to_le32(0x10);
 id->psd[0].exlat = cpu_to_le32(0x4);
 
+if (n->subsys) {
+id->cmic |= NVME_CMIC_MULTI_CTRL;
+}
+
 NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
 NVME_CAP_SET_CQR(n->bar.cap, 1);
 NVME_CAP_SET_TO(n->bar.cap, 0xf);
@@ -4495,6 +4502,24 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice 
*pci_dev)
 n->bar.intmc = n->bar.intms = 0;
 }
 
+static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
+{
+int cntlid;
+
+if (!n->subsys) {
+return 0;
+}
+
+cntlid = nvme_subsys_register_ctrl(n, errp);
+if (cntlid < 0) {
+return -1;
+}
+
+n->cntlid = cntlid;
+
+return 0;
+}
+
 static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 {
 NvmeCtrl *n = NVME(pci_dev);
@@ -4515,6 +4540,10 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
**errp)
 return;
 }
 
+if (nvme_init_subsys(n, errp)) {
+error_propagate(errp, local_err);
+return;
+}
 nvme_init_ctrl(n, pci_dev);
 
 /* setup a namespace if the controller drive property was given */
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 04d4684601fd..b8f5f2d6ffb8 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -134,6 +134,7 @@ typedef struct NvmeCtrl {
 NvmeBus  bus;
 BlockConfconf;
 
+uint16_tcntlid;
 boolqs_created;
 uint32_tpage_size;
 uint16_tpage_bits;
-- 
2.17.1




[PATCH V6 0/6] hw/block/nvme: support multi-path for ctrl/ns

2021-01-23 Thread Minwoo Im
Hello,

This is sixth patch series for the support of NVMe subsystem scheme with
multi-controller and namespace sharing in a subsystem.

This version has a fix in nvme_init_ctrl() when 'cntlid' is set to the
Identify Controller data structure by making it by cpu_to_le16() as
Keith reviewed.

Here's test result with a simple 'nvme list -v' command from this model:

  -device nvme-subsys,id=subsys0 \
  -device nvme,serial=foo,id=nvme0,subsys=subsys0 \
  -device nvme,serial=bar,id=nvme1,subsys=subsys0 \
  -device nvme,serial=baz,id=nvme2,subsys=subsys0 \
  -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0 \
  -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2 \
  \
  -device nvme,serial=qux,id=nvme3 \
  -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3 \
  \
  -device nvme-subsys,id=subsys1 \
  -device nvme,serial=quux,id=nvme4,subsys=subsys1 \
  -device nvme-ns,id=ns4,drive=drv13,nsid=1,subsys=subsys1,zoned=true \

  root@vm:~/work# nvme list -v
  NVM Express Subsystems

  SubsystemSubsystem-NQN
Controllers
   

 
  nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
nvme0, nvme1, nvme2
  nvme-subsys3 nqn.2019-08.org.qemu:qux 
nvme3
  nvme-subsys4 nqn.2019-08.org.qemu:subsys1 
nvme4

  NVM Express Controllers

  Device   SN   MN   FR 
  TxPort AddressSubsystemNamespaces
     
 -- --  
  nvme0foo  QEMU NVMe Ctrl   1.0
  pcie   :00:06.0   nvme-subsys1 nvme1c0n1
  nvme1bar  QEMU NVMe Ctrl   1.0
  pcie   :00:07.0   nvme-subsys1 nvme1c1n1
  nvme2baz  QEMU NVMe Ctrl   1.0
  pcie   :00:08.0   nvme-subsys1 nvme1c2n1, nvme1c2n2
  nvme3qux  QEMU NVMe Ctrl   1.0
  pcie   :00:09.0   nvme-subsys3 nvme3n1
  nvme4quux QEMU NVMe Ctrl   1.0
  pcie   :00:0a.0   nvme-subsys4 nvme4c4n1

  NVM Express Namespaces

  Device   NSID Usage  Format   Controllers
    --  

  nvme1n1  1134.22  MB / 134.22  MB512   B +  0 B   nvme0, 
nvme1, nvme2
  nvme1n2  2268.44  MB / 268.44  MB512   B +  0 B   nvme2
  nvme3n1  3268.44  MB / 268.44  MB512   B +  0 B   nvme3
  nvme4n1  1268.44  MB / 268.44  MB512   B +  0 B   nvme4

Thanks,

Since V5:
  - Fix endianness for 'cntlid' in Identify Controller data structure.
(Keith)

Since V4:
  - Code clean-up to snprintf rather than duplicating it and copy.
(Keith)
  - Documentation for 'subsys' clean-up.  (Keith)
  - Remove 'cntlid' param from nvme_init_ctrl().  (Keith)
  - Put error_propagate() in nvme_realize().  (Keith)

Since RFC V3:
  - Exclude 'deatched' scheme from this series.  This will be covered in
the next series by covering all the ns-related admin commands
including ZNS and ns-mgmt. (Niklas)
  - Rebased on nvme-next.
  - Remove RFC tag from this V4.

Since RFC V2:
  - Rebased on nvme-next branch with trivial patches from the previous
version(V2) applied. (Klaus)
  - Fix enumeration type name convention with NvmeIdNs prefix. (Klaus)
  - Put 'cntlid' to NvmeCtrl instance in nvme_init_ctrl() which was
missed in V2.
  - Added 'detached' parameter to nvme-ns device to decide whether to
attach or not to controller(s) in the subsystem. (Klaus)
  - Implemented Identify Active Namespace ID List aprt from Identify
Allocated Namespace ID List by removing fall-thru statement.

Since RFC V1:
  - Updated namespace sharing scheme to be based on nvme-subsys
hierarchy.

Minwoo Im (6):
  hw/block/nvme: introduce nvme-subsys device
  hw/block/nvme: support to map controller to a subsystem
  hw/block/nvme: add CMIC enum value for Identify Controller
  hw/block/nvme: support for multi-controller in subsystem
  hw/block/nvme: add NMIC enum value for Identify Namespace
  hw/block/nvme: support for shared namespace in subsystem

 hw/block/meson.build   |   2 +-
 hw/block/nvme-ns.c |  23 +++--
 hw/block/nvme-ns.h |   7 +++
 hw/block/nvme-subsys.c | 106 +
 hw/block/nvme-subsys.h |  32 +
 hw/block/nvme.c|  72 +---
 hw/block/nvme.h

[PATCH V6 1/6] hw/block/nvme: introduce nvme-subsys device

2021-01-23 Thread Minwoo Im
To support multi-path in QEMU NVMe device model, We need to have NVMe
subsystem hierarchy to map controllers and namespaces to a NVMe
subsystem.

This patch introduced a simple nvme-subsys device model.  The subsystem
will be prepared with subsystem NQN with  provided in
nvme-subsys device:

  ex) -device nvme-subsys,id=subsys0: nqn.2019-08.org.qemu:subsys0

Signed-off-by: Minwoo Im 
---
 hw/block/meson.build   |  2 +-
 hw/block/nvme-subsys.c | 60 ++
 hw/block/nvme-subsys.h | 25 ++
 hw/block/nvme.c|  3 +++
 4 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 hw/block/nvme-subsys.c
 create mode 100644 hw/block/nvme-subsys.h

diff --git a/hw/block/meson.build b/hw/block/meson.build
index 602ca6c8541d..83ea2d37978d 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -13,7 +13,7 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: 
files('m25p80.c'))
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
 softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
 softmmu_ss.add(when: 'CONFIG_SH4', if_true: files('tc58128.c'))
-softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c'))
+softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 
'nvme-subsys.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
 specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: 
files('vhost-user-blk.c'))
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
new file mode 100644
index ..aa82911b951c
--- /dev/null
+++ b/hw/block/nvme-subsys.c
@@ -0,0 +1,60 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im 
+ *
+ * This code is licensed under the GNU GPL v2.  Refer COPYING.
+ */
+
+#include "qemu/units.h"
+#include "qemu/osdep.h"
+#include "qemu/uuid.h"
+#include "qemu/iov.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-core.h"
+#include "hw/block/block.h"
+#include "block/aio.h"
+#include "block/accounting.h"
+#include "sysemu/sysemu.h"
+#include "hw/pci/pci.h"
+#include "nvme.h"
+#include "nvme-subsys.h"
+
+static void nvme_subsys_setup(NvmeSubsystem *subsys)
+{
+snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
+ "nqn.2019-08.org.qemu:%s", subsys->parent_obj.id);
+}
+
+static void nvme_subsys_realize(DeviceState *dev, Error **errp)
+{
+NvmeSubsystem *subsys = NVME_SUBSYS(dev);
+
+nvme_subsys_setup(subsys);
+}
+
+static void nvme_subsys_class_init(ObjectClass *oc, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(oc);
+
+set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+
+dc->realize = nvme_subsys_realize;
+dc->desc = "Virtual NVMe subsystem";
+}
+
+static const TypeInfo nvme_subsys_info = {
+.name = TYPE_NVME_SUBSYS,
+.parent = TYPE_DEVICE,
+.class_init = nvme_subsys_class_init,
+.instance_size = sizeof(NvmeSubsystem),
+};
+
+static void nvme_subsys_register_types(void)
+{
+type_register_static(_subsys_info);
+}
+
+type_init(nvme_subsys_register_types)
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
new file mode 100644
index ..40f06a4c7db0
--- /dev/null
+++ b/hw/block/nvme-subsys.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im 
+ *
+ * This code is licensed under the GNU GPL v2.  Refer COPYING.
+ */
+
+#ifndef NVME_SUBSYS_H
+#define NVME_SUBSYS_H
+
+#define TYPE_NVME_SUBSYS "nvme-subsys"
+#define NVME_SUBSYS(obj) \
+OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+
+#define NVME_SUBSYS_MAX_CTRLS   32
+
+typedef struct NvmeCtrl NvmeCtrl;
+typedef struct NvmeNamespace NvmeNamespace;
+typedef struct NvmeSubsystem {
+DeviceState parent_obj;
+uint8_t subnqn[256];
+} NvmeSubsystem;
+
+#endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 21aec90637fa..aabccdf36f4b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -25,6 +25,7 @@
  *  mdts=,zoned.append_size_limit= \
  *  -device nvme-ns,drive=,bus=,nsid=,\
  *  zoned=
+ *  -device nvme-subsys,id=
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. By default, the
@@ -38,6 +39,8 @@
  *
  * The PMR will use BAR 4/5 exclusively.
  *
+ * To place controller(s) and namespace(s) to a subsystem, then provide
+ * nvme-subsys device as above.
  *
  * nvme device parameters
  * ~~
-- 
2.17.1




Re: [PATCH v2 01/12] hw/block/nvme: add size to mmio read/write trace events

2021-01-18 Thread Minwoo Im
Reviewed-by: Minwoo Im 



[RFC PATCH V2 00/11] hw/block/nvme: support multi-path for ctrl/ns

2021-01-17 Thread Minwoo Im
Hello,

This patch series introduces NVMe subsystem device to support multi-path
I/O in NVMe device model.  Two use-cases are supported along with this
patch: Multi-controller, Namespace Sharing.

V1 RFC has been discussed with Klaus and Keith, I really appreciate them
for this patch series to have proper direction [1].

This patch series contains few start-up refactoring pathces from the
first to fifth patches to make nvme-ns device not to rely on the nvme
controller always.  Because nvme-ns shall be able to be mapped to the
subsystem level, not a single controller level so that it should provide
generic initialization code: nvme_ns_setup() with NvmeCtrl.  To do that,
the first five patches are to remove the NvmeCtrl * instance argument
from the nvme_ns_setup().  I'd be happy if they are picked!

For controller and namespace devices, 'subsys' property has been
introduced to map them to a subsystem.  If multi-controller needed, we
can specify 'subsys' to controllers the same.

For namespace deivice, if 'subsys' is not given just like it was, it
will have to be provided with 'bus' parameter to specify a nvme
controller device to attach, it means, they are mutual-exlusive.  To
share a namespace between or among controllers, then nvme-ns should have
'subsys' property to a single nvme subsystem instance.  To make a
namespace private one, then we need to specify 'bus' property rather
than the 'subsys'.

Of course, this series does not require any updates for the run command
for the previos users.

Plase refer the following example with nvme-cli output:

QEMU Run:
  -device nvme-subsys,id=subsys0 \
  -device nvme,serial=foo,id=nvme0,subsys=subsys0 \
  -device nvme,serial=bar,id=nvme1,subsys=subsys0 \
  -device nvme,serial=baz,id=nvme2,subsys=subsys0 \
  -device nvme-ns,id=ns1,drive=drv10,nsid=1,subsys=subsys0 \
  -device nvme-ns,id=ns2,drive=drv11,nsid=2,bus=nvme2 \
  \
  -device nvme,serial=qux,id=nvme3 \
  -device nvme-ns,id=ns3,drive=drv12,nsid=3,bus=nvme3

nvme-cli:
  root@vm:~/work# nvme list -v
  NVM Express Subsystems

  SubsystemSubsystem-NQN
Controllers
   

 
  nvme-subsys1 nqn.2019-08.org.qemu:subsys0 
nvme0, nvme1, nvme2
  nvme-subsys3 nqn.2019-08.org.qemu:qux 
nvme3

  NVM Express Controllers

  Device   SN   MN   FR 
  TxPort AddressSubsystemNamespaces
     
 -- --  
  nvme0foo  QEMU NVMe Ctrl   1.0
  pcie   :00:06.0   nvme-subsys1 nvme1n1
  nvme1bar  QEMU NVMe Ctrl   1.0
  pcie   :00:07.0   nvme-subsys1 nvme1n1
  nvme2baz  QEMU NVMe Ctrl   1.0
  pcie   :00:08.0   nvme-subsys1 nvme1n1, nvme1n2
  nvme3qux  QEMU NVMe Ctrl   1.0
  pcie   :00:09.0   nvme-subsys3

  NVM Express Namespaces

  Device   NSID Usage  Format   Controllers
    --  

  nvme1n1  1134.22  MB / 134.22  MB512   B +  0 B   nvme0, 
nvme1, nvme2
  nvme1n2  2268.44  MB / 268.44  MB512   B +  0 B   nvme2
  nvme3n1  3268.44  MB / 268.44  MB512   B +  0 B   nvme3

Summary:
  - Refactored nvme-ns device not to rely on controller during the
setup.  [1/11 - 5/11]
  - Introduced a nvme-subsys device model. [6/11]
  - Create subsystem NQN based on subsystem. [7/11]
  - Introduced multi-controller model. [8/11 - 9/11]
  - Updated namespace sharing scheme to be based on nvme-subsys
hierarchy. [10/11 - 11/11]

Since RFC V1:
  - Updated namespace sharing scheme to be based on nvme-subsys
hierarchy.

Thanks,

[1] https://lists.gnu.org/archive/html/qemu-block/2021-01/msg00425.html

Minwoo Im (11):
  hw/block/nvme: remove unused argument in nvme_ns_init_zoned
  hw/block/nvme: open code for volatile write cache
  hw/block/nvme: remove unused argument in nvme_ns_init_blk
  hw/block/nvme: split setup and register for namespace
  hw/block/nvme: remove unused argument in nvme_ns_setup
  hw/block/nvme: introduce nvme-subsys device
  hw/block/nvme: support to map controller to a subsystem
  hw/block/nvme: add CMIC enum value for Identify Controller
  hw/block/nvme: support for multi-controller in subsystem
  hw/block/nvme: add NMIC enum value for Identify Namespace
  hw/block/nvme: support for shared namespace in subsystem

 hw/block

[RFC PATCH V2 11/11] hw/block/nvme: support for shared namespace in subsystem

2021-01-17 Thread Minwoo Im
nvme-ns device is registered to a nvme controller device during the
initialization in nvme_register_namespace() in case that 'bus' property
is given which means it's mapped to a single controller.

This patch introduced a new property 'subsys' just like the controller
device instance did to map a namespace to a NVMe subsystem.

If 'subsys' property is given to the nvme-ns device, it will belong to
the specified subsystem and will be attached to all controllers in that
subsystem by enabling shared namespace capability in NMIC(Namespace
Multi-path I/O and Namespace Capabilities) in Identify Namespace.

Usage:

  -device nvme-subsys,id=subsys0
  -device nvme,serial=foo,id=nvme0,subsys=subsys0
  -device nvme,serial=bar,id=nvme1,subsys=subsys0
  -device nvme,serial=baz,id=nvme2,subsys=subsys0
  -device nvme-ns,id=ns1,drive=,nsid=1,subsys=subsys0  # Shared
  -device nvme-ns,id=ns2,drive=,nsid=2,bus=nvme2   # Non-shared

  In the above example, 'ns1' will be shared to 'nvme0' and 'nvme1' in
  the same subsystem.  On the other hand, 'ns2' will be attached to the
  'nvme2' only as a private namespace in that subsystem.

Signed-off-by: Minwoo Im 
---
 hw/block/nvme-ns.c | 23 ++-
 hw/block/nvme-ns.h |  7 +++
 hw/block/nvme-subsys.c | 17 +
 hw/block/nvme-subsys.h |  1 +
 hw/block/nvme.c| 10 +-
 5 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index c8b75fa02138..073f65e49cac 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -63,6 +63,10 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
 
 id_ns->npda = id_ns->npdg = npdg - 1;
 
+if (nvme_ns_shared(ns)) {
+id_ns->nmic |= NVME_NMIC_NS_SHARED;
+}
+
 return 0;
 }
 
@@ -358,16 +362,25 @@ static void nvme_ns_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-if (nvme_register_namespace(n, ns, errp)) {
-error_propagate_prepend(errp, local_err,
-"could not register namespace: ");
-return;
+if (ns->subsys) {
+if (nvme_subsys_register_ns(ns, errp)) {
+error_propagate_prepend(errp, local_err,
+"could not setup namespace to subsys: ");
+return;
+}
+} else {
+if (nvme_register_namespace(n, ns, errp)) {
+error_propagate_prepend(errp, local_err,
+"could not register namespace: ");
+return;
+}
 }
-
 }
 
 static Property nvme_ns_props[] = {
 DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
+DEFINE_PROP_LINK("subsys", NvmeNamespace, subsys, TYPE_NVME_SUBSYS,
+ NvmeSubsystem *),
 DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
 DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
 DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index d87c025b0ab6..3fc7262a7915 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -47,6 +47,8 @@ typedef struct NvmeNamespace {
 const uint32_t *iocs;
 uint8_t  csi;
 
+NvmeSubsystem   *subsys;
+
 NvmeIdNsZoned   *id_ns_zoned;
 NvmeZone*zone_array;
 QTAILQ_HEAD(, NvmeZone) exp_open_zones;
@@ -77,6 +79,11 @@ static inline uint32_t nvme_nsid(NvmeNamespace *ns)
 return -1;
 }
 
+static inline bool nvme_ns_shared(NvmeNamespace *ns)
+{
+return !!ns->subsys;
+}
+
 static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
 {
 NvmeIdNs *id_ns = >id_ns;
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
index a01003136b12..d67160a0ed6f 100644
--- a/hw/block/nvme-subsys.c
+++ b/hw/block/nvme-subsys.c
@@ -43,6 +43,23 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
 return cntlid;
 }
 
+int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp)
+{
+NvmeSubsystem *subsys = ns->subsys;
+NvmeCtrl *n;
+int i;
+
+for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
+n = subsys->ctrls[i];
+
+if (n && nvme_register_namespace(n, ns, errp)) {
+return -1;
+}
+}
+
+return 0;
+}
+
 static void nvme_subsys_setup(NvmeSubsystem *subsys)
 {
 char *subnqn;
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
index 4eba50d96a1d..cae0fbd464e2 100644
--- a/hw/block/nvme-subsys.h
+++ b/hw/block/nvme-subsys.h
@@ -25,5 +25,6 @@ typedef struct NvmeSubsystem {
 } NvmeSubsystem;
 
 int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
+int nvme_subsys_register_ns(NvmeNamespace *ns, Error **errp);
 
 #endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 2aeb164dd508..7869bd95b099 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -25,7 +25,8 @@
  *  mdts=,zoned.append_s

  1   2   3   >