Re: [PATCH v6 38/42] nvme: support multiple namespaces

2020-03-31 Thread Maxim Levitsky
On Tue, 2020-03-31 at 07:48 +0200, Klaus Birkelund Jensen wrote:
> On Mar 25 12:59, Maxim Levitsky wrote:
> > On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote:
> > > From: Klaus Jensen 
> > > 
> > > This adds support for multiple namespaces by introducing a new 'nvme-ns'
> > > device model. The nvme device creates a bus named from the device name
> > > ('id'). The nvme-ns devices then connect to this and registers
> > > themselves with the nvme device.
> > > 
> > > This changes how an nvme device is created. Example with two namespaces:
> > > 
> > >   -drive file=nvme0n1.img,if=none,id=disk1
> > >   -drive file=nvme0n2.img,if=none,id=disk2
> > >   -device nvme,serial=deadbeef,id=nvme0
> > >   -device nvme-ns,drive=disk1,bus=nvme0,nsid=1
> > >   -device nvme-ns,drive=disk2,bus=nvme0,nsid=2
> > > 
> > > The drive property is kept on the nvme device to keep the change
> > > backward compatible, but the property is now optional. Specifying a
> > > drive for the nvme device will always create the namespace with nsid 1.
> > > 
> > > Signed-off-by: Klaus Jensen 
> > > Signed-off-by: Klaus Jensen 
> > > Reviewed-by: Keith Busch 
> > > ---
> > >  hw/block/Makefile.objs |   2 +-
> > >  hw/block/nvme-ns.c | 157 +++
> > >  hw/block/nvme-ns.h |  60 +++
> > >  hw/block/nvme.c| 233 ++---
> > >  hw/block/nvme.h|  47 -
> > >  hw/block/trace-events  |   4 +-
> > >  6 files changed, 389 insertions(+), 114 deletions(-)
> > >  create mode 100644 hw/block/nvme-ns.c
> > >  create mode 100644 hw/block/nvme-ns.h
> > > 
> > > diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
> > > index 4b4a2b338dc4..d9141d6a4b9b 100644
> > > --- a/hw/block/Makefile.objs
> > > +++ b/hw/block/Makefile.objs
> > > @@ -2518,9 +2561,6 @@ static void nvme_init_ctrl(NvmeCtrl *n)
> > >  id->psd[0].mp = cpu_to_le16(0x9c4);
> > >  id->psd[0].enlat = cpu_to_le32(0x10);
> > >  id->psd[0].exlat = cpu_to_le32(0x4);
> > > -if (blk_enable_write_cache(n->conf.blk)) {
> > > -id->vwc = 1;
> > > -}
> > 
> > Shouldn't that be kept? Assuming that user used the legacy 'drive' option,
> > and it had no write cache enabled.
> > 
> 
> When using the drive option we still end up calling the same code that
> handles the "new style" namespaces and that code will handle the write
> cache similary.
OK. That makes sense.

> 
> > >  
> > >  n->bar.cap = 0;
> > >  NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
> > > @@ -2533,25 +2573,34 @@ static void nvme_init_ctrl(NvmeCtrl *n)
> > >  n->bar.intmc = n->bar.intms = 0;
> > >  }
> > >  
> > > -static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error 
> > > **errp)
> > > +int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> > >  {
> > > -int64_t bs_size;
> > > -NvmeIdNs *id_ns = >id_ns;
> > > +uint32_t nsid = nvme_nsid(ns);
> > >  
> > > -bs_size = blk_getlength(n->conf.blk);
> > > -if (bs_size < 0) {
> > > -error_setg_errno(errp, -bs_size, "blk_getlength");
> > > +if (nsid > NVME_MAX_NAMESPACES) {
> > > +error_setg(errp, "invalid nsid (must be between 0 and %d)",
> > > +   NVME_MAX_NAMESPACES);
> > >  return -1;
> > >  }
> > >  
> > > -id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> > > -n->ns_size = bs_size;
> > > +if (!nsid) {
> > > +for (int i = 1; i <= n->num_namespaces; i++) {
> > > +NvmeNamespace *ns = nvme_ns(n, i);
> > > +if (!ns) {
> > > +nsid = i;
> > > +break;
> > > +}
> > > +}
> > 
> > This misses an edge error case, where all the namespaces are allocated.
> > Yes, it would be insane to allocate all 256 namespaces but still.
> > 
> 
> Impressive catch! Fixed!
Thanks!
> 
> > 
> > > +} else {
> > > +if (n->namespaces[nsid - 1]) {
> > > +error_setg(errp, "nsid must be unique");
> > 
> > I''l would change that error message to something like 
> > "namespace id %d is already in use" or something like that.
> > 
> 
> Done.
> 

Best regards,
Maxim Levitsky




Re: [PATCH v6 38/42] nvme: support multiple namespaces

2020-03-30 Thread Klaus Birkelund Jensen
On Mar 25 12:59, Maxim Levitsky wrote:
> On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote:
> > From: Klaus Jensen 
> > 
> > This adds support for multiple namespaces by introducing a new 'nvme-ns'
> > device model. The nvme device creates a bus named from the device name
> > ('id'). The nvme-ns devices then connect to this and registers
> > themselves with the nvme device.
> > 
> > This changes how an nvme device is created. Example with two namespaces:
> > 
> >   -drive file=nvme0n1.img,if=none,id=disk1
> >   -drive file=nvme0n2.img,if=none,id=disk2
> >   -device nvme,serial=deadbeef,id=nvme0
> >   -device nvme-ns,drive=disk1,bus=nvme0,nsid=1
> >   -device nvme-ns,drive=disk2,bus=nvme0,nsid=2
> > 
> > The drive property is kept on the nvme device to keep the change
> > backward compatible, but the property is now optional. Specifying a
> > drive for the nvme device will always create the namespace with nsid 1.
> > 
> > Signed-off-by: Klaus Jensen 
> > Signed-off-by: Klaus Jensen 
> > Reviewed-by: Keith Busch 
> > ---
> >  hw/block/Makefile.objs |   2 +-
> >  hw/block/nvme-ns.c | 157 +++
> >  hw/block/nvme-ns.h |  60 +++
> >  hw/block/nvme.c| 233 ++---
> >  hw/block/nvme.h|  47 -
> >  hw/block/trace-events  |   4 +-
> >  6 files changed, 389 insertions(+), 114 deletions(-)
> >  create mode 100644 hw/block/nvme-ns.c
> >  create mode 100644 hw/block/nvme-ns.h
> > 
> > diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
> > index 4b4a2b338dc4..d9141d6a4b9b 100644
> > --- a/hw/block/Makefile.objs
> > +++ b/hw/block/Makefile.objs

> > @@ -2518,9 +2561,6 @@ static void nvme_init_ctrl(NvmeCtrl *n)
> >  id->psd[0].mp = cpu_to_le16(0x9c4);
> >  id->psd[0].enlat = cpu_to_le32(0x10);
> >  id->psd[0].exlat = cpu_to_le32(0x4);
> > -if (blk_enable_write_cache(n->conf.blk)) {
> > -id->vwc = 1;
> > -}
> Shouldn't that be kept? Assuming that user used the legacy 'drive' option,
> and it had no write cache enabled.
> 

When using the drive option we still end up calling the same code that
handles the "new style" namespaces and that code will handle the write
cache similary.

> >  
> >  n->bar.cap = 0;
> >  NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
> > @@ -2533,25 +2573,34 @@ static void nvme_init_ctrl(NvmeCtrl *n)
> >  n->bar.intmc = n->bar.intms = 0;
> >  }
> >  
> > -static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error 
> > **errp)
> > +int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> >  {
> > -int64_t bs_size;
> > -NvmeIdNs *id_ns = >id_ns;
> > +uint32_t nsid = nvme_nsid(ns);
> >  
> > -bs_size = blk_getlength(n->conf.blk);
> > -if (bs_size < 0) {
> > -error_setg_errno(errp, -bs_size, "blk_getlength");
> > +if (nsid > NVME_MAX_NAMESPACES) {
> > +error_setg(errp, "invalid nsid (must be between 0 and %d)",
> > +   NVME_MAX_NAMESPACES);
> >  return -1;
> >  }
> >  
> > -id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> > -n->ns_size = bs_size;
> > +if (!nsid) {
> > +for (int i = 1; i <= n->num_namespaces; i++) {
> > +NvmeNamespace *ns = nvme_ns(n, i);
> > +if (!ns) {
> > +nsid = i;
> > +break;
> > +}
> > +}
> This misses an edge error case, where all the namespaces are allocated.
> Yes, it would be insane to allocate all 256 namespaces but still.
> 

Impressive catch! Fixed!

> 
> > +} else {
> > +if (n->namespaces[nsid - 1]) {
> > +error_setg(errp, "nsid must be unique");
> 
> I''l would change that error message to something like 
> "namespace id %d is already in use" or something like that.
> 

Done.




Re: [PATCH v6 38/42] nvme: support multiple namespaces

2020-03-25 Thread Maxim Levitsky
On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> This adds support for multiple namespaces by introducing a new 'nvme-ns'
> device model. The nvme device creates a bus named from the device name
> ('id'). The nvme-ns devices then connect to this and registers
> themselves with the nvme device.
> 
> This changes how an nvme device is created. Example with two namespaces:
> 
>   -drive file=nvme0n1.img,if=none,id=disk1
>   -drive file=nvme0n2.img,if=none,id=disk2
>   -device nvme,serial=deadbeef,id=nvme0
>   -device nvme-ns,drive=disk1,bus=nvme0,nsid=1
>   -device nvme-ns,drive=disk2,bus=nvme0,nsid=2
> 
> The drive property is kept on the nvme device to keep the change
> backward compatible, but the property is now optional. Specifying a
> drive for the nvme device will always create the namespace with nsid 1.
> 
> Signed-off-by: Klaus Jensen 
> Signed-off-by: Klaus Jensen 
> Reviewed-by: Keith Busch 
> ---
>  hw/block/Makefile.objs |   2 +-
>  hw/block/nvme-ns.c | 157 +++
>  hw/block/nvme-ns.h |  60 +++
>  hw/block/nvme.c| 233 ++---
>  hw/block/nvme.h|  47 -
>  hw/block/trace-events  |   4 +-
>  6 files changed, 389 insertions(+), 114 deletions(-)
>  create mode 100644 hw/block/nvme-ns.c
>  create mode 100644 hw/block/nvme-ns.h
> 
> diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
> index 4b4a2b338dc4..d9141d6a4b9b 100644
> --- a/hw/block/Makefile.objs
> +++ b/hw/block/Makefile.objs
> @@ -7,7 +7,7 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
>  common-obj-$(CONFIG_XEN) += xen-block.o
>  common-obj-$(CONFIG_ECC) += ecc.o
>  common-obj-$(CONFIG_ONENAND) += onenand.o
> -common-obj-$(CONFIG_NVME_PCI) += nvme.o
> +common-obj-$(CONFIG_NVME_PCI) += nvme.o nvme-ns.o
>  common-obj-$(CONFIG_SWIM) += swim.o
>  
>  common-obj-$(CONFIG_SH4) += tc58128.o
> diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
> new file mode 100644
> index ..6d975104171d
> --- /dev/null
> +++ b/hw/block/nvme-ns.c
> @@ -0,0 +1,157 @@
> +#include "qemu/osdep.h"
> +#include "qemu/units.h"
> +#include "qemu/cutils.h"
> +#include "qemu/log.h"
> +#include "hw/block/block.h"
> +#include "hw/pci/pci.h"
> +#include "sysemu/sysemu.h"
> +#include "sysemu/block-backend.h"
> +#include "qapi/error.h"
> +
> +#include "hw/qdev-properties.h"
> +#include "hw/qdev-core.h"
> +
> +#include "nvme.h"
> +#include "nvme-ns.h"
> +
> +static int nvme_ns_init(NvmeNamespace *ns)
> +{
> +NvmeIdNs *id_ns = >id_ns;
> +
> +id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
> +id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
> +
> +/* no thin provisioning */
> +id_ns->ncap = id_ns->nsze;
> +id_ns->nuse = id_ns->ncap;
> +
> +return 0;
> +}
Looks great!

> +
> +static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, NvmeIdCtrl *id,
> +Error **errp)
> +{
> +uint64_t perm, shared_perm;
> +
> +Error *local_err = NULL;
> +int ret;
> +
> +perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
> +shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
> +BLK_PERM_GRAPH_MOD;
> +
> +ret = blk_set_perm(ns->blk, perm, shared_perm, _err);
> +if (ret) {
> +error_propagate_prepend(errp, local_err,
> +"could not set block permissions: ");
> +return ret;
> +}
> +
> +ns->size = blk_getlength(ns->blk);
> +if (ns->size < 0) {
> +error_setg_errno(errp, -ns->size, "could not get blockdev size");
> +return -1;
> +}
> +
> +switch (n->conf.wce) {
> +case ON_OFF_AUTO_ON:
> +n->features.volatile_wc = 1;
> +break;
> +case ON_OFF_AUTO_OFF:
> +n->features.volatile_wc = 0;
> +case ON_OFF_AUTO_AUTO:
> +n->features.volatile_wc = blk_enable_write_cache(ns->blk);
> +break;
> +default:
> +abort();
> +}
> +
> +blk_set_enable_write_cache(ns->blk, n->features.volatile_wc);
> +
> +return 0;
> +}

This needs review from someone that knows the block layer better that I do.
I still think that maybe you can somehow use the blkconf_apply_backend_options
(or even extend it to suit you somehow). I'll leave this to the block layer 
folks.



> +
> +static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
> +{
> +if (!ns->blk) {
> +error_setg(errp, "block backend not configured");
> +return 1;
> +}
> +
> +return 0;
> +}
> +
> +int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> +{
> +if (nvme_ns_check_constraints(ns, errp)) {
> +return -1;
> +}
> +
> +if (nvme_ns_init_blk(n, ns, >id_ctrl, errp)) {
> +return -1;
> +}
> +
> +nvme_ns_init(ns);
> +if (nvme_register_namespace(n, ns, errp)) {
> +return -1;
> +}
> +
> +return 0;
> +}
> +
> +static void nvme_ns_realize(DeviceState *dev, Error **errp)
> +{
> +

[PATCH v6 38/42] nvme: support multiple namespaces

2020-03-16 Thread Klaus Jensen
From: Klaus Jensen 

This adds support for multiple namespaces by introducing a new 'nvme-ns'
device model. The nvme device creates a bus named from the device name
('id'). The nvme-ns devices then connect to this and registers
themselves with the nvme device.

This changes how an nvme device is created. Example with two namespaces:

  -drive file=nvme0n1.img,if=none,id=disk1
  -drive file=nvme0n2.img,if=none,id=disk2
  -device nvme,serial=deadbeef,id=nvme0
  -device nvme-ns,drive=disk1,bus=nvme0,nsid=1
  -device nvme-ns,drive=disk2,bus=nvme0,nsid=2

The drive property is kept on the nvme device to keep the change
backward compatible, but the property is now optional. Specifying a
drive for the nvme device will always create the namespace with nsid 1.

Signed-off-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
Reviewed-by: Keith Busch 
---
 hw/block/Makefile.objs |   2 +-
 hw/block/nvme-ns.c | 157 +++
 hw/block/nvme-ns.h |  60 +++
 hw/block/nvme.c| 233 ++---
 hw/block/nvme.h|  47 -
 hw/block/trace-events  |   4 +-
 6 files changed, 389 insertions(+), 114 deletions(-)
 create mode 100644 hw/block/nvme-ns.c
 create mode 100644 hw/block/nvme-ns.h

diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
index 4b4a2b338dc4..d9141d6a4b9b 100644
--- a/hw/block/Makefile.objs
+++ b/hw/block/Makefile.objs
@@ -7,7 +7,7 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
 common-obj-$(CONFIG_XEN) += xen-block.o
 common-obj-$(CONFIG_ECC) += ecc.o
 common-obj-$(CONFIG_ONENAND) += onenand.o
-common-obj-$(CONFIG_NVME_PCI) += nvme.o
+common-obj-$(CONFIG_NVME_PCI) += nvme.o nvme-ns.o
 common-obj-$(CONFIG_SWIM) += swim.o
 
 common-obj-$(CONFIG_SH4) += tc58128.o
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
new file mode 100644
index ..6d975104171d
--- /dev/null
+++ b/hw/block/nvme-ns.c
@@ -0,0 +1,157 @@
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/cutils.h"
+#include "qemu/log.h"
+#include "hw/block/block.h"
+#include "hw/pci/pci.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/qdev-core.h"
+
+#include "nvme.h"
+#include "nvme-ns.h"
+
+static int nvme_ns_init(NvmeNamespace *ns)
+{
+NvmeIdNs *id_ns = >id_ns;
+
+id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
+id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
+
+/* no thin provisioning */
+id_ns->ncap = id_ns->nsze;
+id_ns->nuse = id_ns->ncap;
+
+return 0;
+}
+
+static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, NvmeIdCtrl *id,
+Error **errp)
+{
+uint64_t perm, shared_perm;
+
+Error *local_err = NULL;
+int ret;
+
+perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+BLK_PERM_GRAPH_MOD;
+
+ret = blk_set_perm(ns->blk, perm, shared_perm, _err);
+if (ret) {
+error_propagate_prepend(errp, local_err,
+"could not set block permissions: ");
+return ret;
+}
+
+ns->size = blk_getlength(ns->blk);
+if (ns->size < 0) {
+error_setg_errno(errp, -ns->size, "could not get blockdev size");
+return -1;
+}
+
+switch (n->conf.wce) {
+case ON_OFF_AUTO_ON:
+n->features.volatile_wc = 1;
+break;
+case ON_OFF_AUTO_OFF:
+n->features.volatile_wc = 0;
+case ON_OFF_AUTO_AUTO:
+n->features.volatile_wc = blk_enable_write_cache(ns->blk);
+break;
+default:
+abort();
+}
+
+blk_set_enable_write_cache(ns->blk, n->features.volatile_wc);
+
+return 0;
+}
+
+static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
+{
+if (!ns->blk) {
+error_setg(errp, "block backend not configured");
+return 1;
+}
+
+return 0;
+}
+
+int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+if (nvme_ns_check_constraints(ns, errp)) {
+return -1;
+}
+
+if (nvme_ns_init_blk(n, ns, >id_ctrl, errp)) {
+return -1;
+}
+
+nvme_ns_init(ns);
+if (nvme_register_namespace(n, ns, errp)) {
+return -1;
+}
+
+return 0;
+}
+
+static void nvme_ns_realize(DeviceState *dev, Error **errp)
+{
+NvmeNamespace *ns = NVME_NS(dev);
+BusState *s = qdev_get_parent_bus(dev);
+NvmeCtrl *n = NVME(s->parent);
+Error *local_err = NULL;
+
+if (nvme_ns_setup(n, ns, _err)) {
+error_propagate_prepend(errp, local_err,
+"could not setup namespace: ");
+return;
+}
+}
+
+static Property nvme_ns_props[] = {
+DEFINE_NVME_NS_PROPERTIES(NvmeNamespace, params),
+DEFINE_PROP_END_OF_LIST(),
+};
+
+static void nvme_ns_class_init(ObjectClass *oc, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(oc);
+
+set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+