Re: [PATCH v6 38/42] nvme: support multiple namespaces
On Tue, 2020-03-31 at 07:48 +0200, Klaus Birkelund Jensen wrote: > On Mar 25 12:59, Maxim Levitsky wrote: > > On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote: > > > From: Klaus Jensen > > > > > > This adds support for multiple namespaces by introducing a new 'nvme-ns' > > > device model. The nvme device creates a bus named from the device name > > > ('id'). The nvme-ns devices then connect to this and registers > > > themselves with the nvme device. > > > > > > This changes how an nvme device is created. Example with two namespaces: > > > > > > -drive file=nvme0n1.img,if=none,id=disk1 > > > -drive file=nvme0n2.img,if=none,id=disk2 > > > -device nvme,serial=deadbeef,id=nvme0 > > > -device nvme-ns,drive=disk1,bus=nvme0,nsid=1 > > > -device nvme-ns,drive=disk2,bus=nvme0,nsid=2 > > > > > > The drive property is kept on the nvme device to keep the change > > > backward compatible, but the property is now optional. Specifying a > > > drive for the nvme device will always create the namespace with nsid 1. > > > > > > Signed-off-by: Klaus Jensen > > > Signed-off-by: Klaus Jensen > > > Reviewed-by: Keith Busch > > > --- > > > hw/block/Makefile.objs | 2 +- > > > hw/block/nvme-ns.c | 157 +++ > > > hw/block/nvme-ns.h | 60 +++ > > > hw/block/nvme.c| 233 ++--- > > > hw/block/nvme.h| 47 - > > > hw/block/trace-events | 4 +- > > > 6 files changed, 389 insertions(+), 114 deletions(-) > > > create mode 100644 hw/block/nvme-ns.c > > > create mode 100644 hw/block/nvme-ns.h > > > > > > diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs > > > index 4b4a2b338dc4..d9141d6a4b9b 100644 > > > --- a/hw/block/Makefile.objs > > > +++ b/hw/block/Makefile.objs > > > @@ -2518,9 +2561,6 @@ static void nvme_init_ctrl(NvmeCtrl *n) > > > id->psd[0].mp = cpu_to_le16(0x9c4); > > > id->psd[0].enlat = cpu_to_le32(0x10); > > > id->psd[0].exlat = cpu_to_le32(0x4); > > > -if (blk_enable_write_cache(n->conf.blk)) { > > > -id->vwc = 1; > > > -} > > > > Shouldn't that be kept? Assuming that user used the legacy 'drive' option, > > and it had no write cache enabled. > > > > When using the drive option we still end up calling the same code that > handles the "new style" namespaces and that code will handle the write > cache similary. OK. That makes sense. > > > > > > > n->bar.cap = 0; > > > NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); > > > @@ -2533,25 +2573,34 @@ static void nvme_init_ctrl(NvmeCtrl *n) > > > n->bar.intmc = n->bar.intms = 0; > > > } > > > > > > -static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error > > > **errp) > > > +int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) > > > { > > > -int64_t bs_size; > > > -NvmeIdNs *id_ns = &ns->id_ns; > > > +uint32_t nsid = nvme_nsid(ns); > > > > > > -bs_size = blk_getlength(n->conf.blk); > > > -if (bs_size < 0) { > > > -error_setg_errno(errp, -bs_size, "blk_getlength"); > > > +if (nsid > NVME_MAX_NAMESPACES) { > > > +error_setg(errp, "invalid nsid (must be between 0 and %d)", > > > + NVME_MAX_NAMESPACES); > > > return -1; > > > } > > > > > > -id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; > > > -n->ns_size = bs_size; > > > +if (!nsid) { > > > +for (int i = 1; i <= n->num_namespaces; i++) { > > > +NvmeNamespace *ns = nvme_ns(n, i); > > > +if (!ns) { > > > +nsid = i; > > > +break; > > > +} > > > +} > > > > This misses an edge error case, where all the namespaces are allocated. > > Yes, it would be insane to allocate all 256 namespaces but still. > > > > Impressive catch! Fixed! Thanks! > > > > > > +} else { > > > +if (n->namespaces[nsid - 1]) { > > > +error_setg(errp, "nsid must be unique"); > > > > I''l would change that error message to something like > > "namespace id %d is already in use" or something like that. > > > > Done. > Best regards, Maxim Levitsky
Re: [PATCH v6 38/42] nvme: support multiple namespaces
On Mar 25 12:59, Maxim Levitsky wrote: > On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote: > > From: Klaus Jensen > > > > This adds support for multiple namespaces by introducing a new 'nvme-ns' > > device model. The nvme device creates a bus named from the device name > > ('id'). The nvme-ns devices then connect to this and registers > > themselves with the nvme device. > > > > This changes how an nvme device is created. Example with two namespaces: > > > > -drive file=nvme0n1.img,if=none,id=disk1 > > -drive file=nvme0n2.img,if=none,id=disk2 > > -device nvme,serial=deadbeef,id=nvme0 > > -device nvme-ns,drive=disk1,bus=nvme0,nsid=1 > > -device nvme-ns,drive=disk2,bus=nvme0,nsid=2 > > > > The drive property is kept on the nvme device to keep the change > > backward compatible, but the property is now optional. Specifying a > > drive for the nvme device will always create the namespace with nsid 1. > > > > Signed-off-by: Klaus Jensen > > Signed-off-by: Klaus Jensen > > Reviewed-by: Keith Busch > > --- > > hw/block/Makefile.objs | 2 +- > > hw/block/nvme-ns.c | 157 +++ > > hw/block/nvme-ns.h | 60 +++ > > hw/block/nvme.c| 233 ++--- > > hw/block/nvme.h| 47 - > > hw/block/trace-events | 4 +- > > 6 files changed, 389 insertions(+), 114 deletions(-) > > create mode 100644 hw/block/nvme-ns.c > > create mode 100644 hw/block/nvme-ns.h > > > > diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs > > index 4b4a2b338dc4..d9141d6a4b9b 100644 > > --- a/hw/block/Makefile.objs > > +++ b/hw/block/Makefile.objs > > @@ -2518,9 +2561,6 @@ static void nvme_init_ctrl(NvmeCtrl *n) > > id->psd[0].mp = cpu_to_le16(0x9c4); > > id->psd[0].enlat = cpu_to_le32(0x10); > > id->psd[0].exlat = cpu_to_le32(0x4); > > -if (blk_enable_write_cache(n->conf.blk)) { > > -id->vwc = 1; > > -} > Shouldn't that be kept? Assuming that user used the legacy 'drive' option, > and it had no write cache enabled. > When using the drive option we still end up calling the same code that handles the "new style" namespaces and that code will handle the write cache similary. > > > > n->bar.cap = 0; > > NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); > > @@ -2533,25 +2573,34 @@ static void nvme_init_ctrl(NvmeCtrl *n) > > n->bar.intmc = n->bar.intms = 0; > > } > > > > -static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error > > **errp) > > +int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) > > { > > -int64_t bs_size; > > -NvmeIdNs *id_ns = &ns->id_ns; > > +uint32_t nsid = nvme_nsid(ns); > > > > -bs_size = blk_getlength(n->conf.blk); > > -if (bs_size < 0) { > > -error_setg_errno(errp, -bs_size, "blk_getlength"); > > +if (nsid > NVME_MAX_NAMESPACES) { > > +error_setg(errp, "invalid nsid (must be between 0 and %d)", > > + NVME_MAX_NAMESPACES); > > return -1; > > } > > > > -id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; > > -n->ns_size = bs_size; > > +if (!nsid) { > > +for (int i = 1; i <= n->num_namespaces; i++) { > > +NvmeNamespace *ns = nvme_ns(n, i); > > +if (!ns) { > > +nsid = i; > > +break; > > +} > > +} > This misses an edge error case, where all the namespaces are allocated. > Yes, it would be insane to allocate all 256 namespaces but still. > Impressive catch! Fixed! > > > +} else { > > +if (n->namespaces[nsid - 1]) { > > +error_setg(errp, "nsid must be unique"); > > I''l would change that error message to something like > "namespace id %d is already in use" or something like that. > Done.
Re: [PATCH v6 38/42] nvme: support multiple namespaces
On Mon, 2020-03-16 at 07:29 -0700, Klaus Jensen wrote: > From: Klaus Jensen > > This adds support for multiple namespaces by introducing a new 'nvme-ns' > device model. The nvme device creates a bus named from the device name > ('id'). The nvme-ns devices then connect to this and registers > themselves with the nvme device. > > This changes how an nvme device is created. Example with two namespaces: > > -drive file=nvme0n1.img,if=none,id=disk1 > -drive file=nvme0n2.img,if=none,id=disk2 > -device nvme,serial=deadbeef,id=nvme0 > -device nvme-ns,drive=disk1,bus=nvme0,nsid=1 > -device nvme-ns,drive=disk2,bus=nvme0,nsid=2 > > The drive property is kept on the nvme device to keep the change > backward compatible, but the property is now optional. Specifying a > drive for the nvme device will always create the namespace with nsid 1. > > Signed-off-by: Klaus Jensen > Signed-off-by: Klaus Jensen > Reviewed-by: Keith Busch > --- > hw/block/Makefile.objs | 2 +- > hw/block/nvme-ns.c | 157 +++ > hw/block/nvme-ns.h | 60 +++ > hw/block/nvme.c| 233 ++--- > hw/block/nvme.h| 47 - > hw/block/trace-events | 4 +- > 6 files changed, 389 insertions(+), 114 deletions(-) > create mode 100644 hw/block/nvme-ns.c > create mode 100644 hw/block/nvme-ns.h > > diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs > index 4b4a2b338dc4..d9141d6a4b9b 100644 > --- a/hw/block/Makefile.objs > +++ b/hw/block/Makefile.objs > @@ -7,7 +7,7 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o > common-obj-$(CONFIG_XEN) += xen-block.o > common-obj-$(CONFIG_ECC) += ecc.o > common-obj-$(CONFIG_ONENAND) += onenand.o > -common-obj-$(CONFIG_NVME_PCI) += nvme.o > +common-obj-$(CONFIG_NVME_PCI) += nvme.o nvme-ns.o > common-obj-$(CONFIG_SWIM) += swim.o > > common-obj-$(CONFIG_SH4) += tc58128.o > diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c > new file mode 100644 > index ..6d975104171d > --- /dev/null > +++ b/hw/block/nvme-ns.c > @@ -0,0 +1,157 @@ > +#include "qemu/osdep.h" > +#include "qemu/units.h" > +#include "qemu/cutils.h" > +#include "qemu/log.h" > +#include "hw/block/block.h" > +#include "hw/pci/pci.h" > +#include "sysemu/sysemu.h" > +#include "sysemu/block-backend.h" > +#include "qapi/error.h" > + > +#include "hw/qdev-properties.h" > +#include "hw/qdev-core.h" > + > +#include "nvme.h" > +#include "nvme-ns.h" > + > +static int nvme_ns_init(NvmeNamespace *ns) > +{ > +NvmeIdNs *id_ns = &ns->id_ns; > + > +id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; > +id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns)); > + > +/* no thin provisioning */ > +id_ns->ncap = id_ns->nsze; > +id_ns->nuse = id_ns->ncap; > + > +return 0; > +} Looks great! > + > +static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, NvmeIdCtrl *id, > +Error **errp) > +{ > +uint64_t perm, shared_perm; > + > +Error *local_err = NULL; > +int ret; > + > +perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; > +shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | > +BLK_PERM_GRAPH_MOD; > + > +ret = blk_set_perm(ns->blk, perm, shared_perm, &local_err); > +if (ret) { > +error_propagate_prepend(errp, local_err, > +"could not set block permissions: "); > +return ret; > +} > + > +ns->size = blk_getlength(ns->blk); > +if (ns->size < 0) { > +error_setg_errno(errp, -ns->size, "could not get blockdev size"); > +return -1; > +} > + > +switch (n->conf.wce) { > +case ON_OFF_AUTO_ON: > +n->features.volatile_wc = 1; > +break; > +case ON_OFF_AUTO_OFF: > +n->features.volatile_wc = 0; > +case ON_OFF_AUTO_AUTO: > +n->features.volatile_wc = blk_enable_write_cache(ns->blk); > +break; > +default: > +abort(); > +} > + > +blk_set_enable_write_cache(ns->blk, n->features.volatile_wc); > + > +return 0; > +} This needs review from someone that knows the block layer better that I do. I still think that maybe you can somehow use the blkconf_apply_backend_options (or even extend it to suit you somehow). I'll leave this to the block layer folks. > + > +static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) > +{ > +if (!ns->blk) { > +error_setg(errp, "block backend not configured"); > +return 1; > +} > + > +return 0; > +} > + > +int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) > +{ > +if (nvme_ns_check_constraints(ns, errp)) { > +return -1; > +} > + > +if (nvme_ns_init_blk(n, ns, &n->id_ctrl, errp)) { > +return -1; > +} > + > +nvme_ns_init(ns); > +if (nvme_register_namespace(n, ns, errp)) { > +return -1; > +} > + > +return 0; > +} > + > +static void nvme_ns_realize(DeviceState *dev, Error **errp)
[PATCH v6 38/42] nvme: support multiple namespaces
From: Klaus Jensen This adds support for multiple namespaces by introducing a new 'nvme-ns' device model. The nvme device creates a bus named from the device name ('id'). The nvme-ns devices then connect to this and registers themselves with the nvme device. This changes how an nvme device is created. Example with two namespaces: -drive file=nvme0n1.img,if=none,id=disk1 -drive file=nvme0n2.img,if=none,id=disk2 -device nvme,serial=deadbeef,id=nvme0 -device nvme-ns,drive=disk1,bus=nvme0,nsid=1 -device nvme-ns,drive=disk2,bus=nvme0,nsid=2 The drive property is kept on the nvme device to keep the change backward compatible, but the property is now optional. Specifying a drive for the nvme device will always create the namespace with nsid 1. Signed-off-by: Klaus Jensen Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch --- hw/block/Makefile.objs | 2 +- hw/block/nvme-ns.c | 157 +++ hw/block/nvme-ns.h | 60 +++ hw/block/nvme.c| 233 ++--- hw/block/nvme.h| 47 - hw/block/trace-events | 4 +- 6 files changed, 389 insertions(+), 114 deletions(-) create mode 100644 hw/block/nvme-ns.c create mode 100644 hw/block/nvme-ns.h diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs index 4b4a2b338dc4..d9141d6a4b9b 100644 --- a/hw/block/Makefile.objs +++ b/hw/block/Makefile.objs @@ -7,7 +7,7 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o common-obj-$(CONFIG_XEN) += xen-block.o common-obj-$(CONFIG_ECC) += ecc.o common-obj-$(CONFIG_ONENAND) += onenand.o -common-obj-$(CONFIG_NVME_PCI) += nvme.o +common-obj-$(CONFIG_NVME_PCI) += nvme.o nvme-ns.o common-obj-$(CONFIG_SWIM) += swim.o common-obj-$(CONFIG_SH4) += tc58128.o diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c new file mode 100644 index ..6d975104171d --- /dev/null +++ b/hw/block/nvme-ns.c @@ -0,0 +1,157 @@ +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/cutils.h" +#include "qemu/log.h" +#include "hw/block/block.h" +#include "hw/pci/pci.h" +#include "sysemu/sysemu.h" +#include "sysemu/block-backend.h" +#include "qapi/error.h" + +#include "hw/qdev-properties.h" +#include "hw/qdev-core.h" + +#include "nvme.h" +#include "nvme-ns.h" + +static int nvme_ns_init(NvmeNamespace *ns) +{ +NvmeIdNs *id_ns = &ns->id_ns; + +id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; +id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns)); + +/* no thin provisioning */ +id_ns->ncap = id_ns->nsze; +id_ns->nuse = id_ns->ncap; + +return 0; +} + +static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, NvmeIdCtrl *id, +Error **errp) +{ +uint64_t perm, shared_perm; + +Error *local_err = NULL; +int ret; + +perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; +shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | +BLK_PERM_GRAPH_MOD; + +ret = blk_set_perm(ns->blk, perm, shared_perm, &local_err); +if (ret) { +error_propagate_prepend(errp, local_err, +"could not set block permissions: "); +return ret; +} + +ns->size = blk_getlength(ns->blk); +if (ns->size < 0) { +error_setg_errno(errp, -ns->size, "could not get blockdev size"); +return -1; +} + +switch (n->conf.wce) { +case ON_OFF_AUTO_ON: +n->features.volatile_wc = 1; +break; +case ON_OFF_AUTO_OFF: +n->features.volatile_wc = 0; +case ON_OFF_AUTO_AUTO: +n->features.volatile_wc = blk_enable_write_cache(ns->blk); +break; +default: +abort(); +} + +blk_set_enable_write_cache(ns->blk, n->features.volatile_wc); + +return 0; +} + +static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) +{ +if (!ns->blk) { +error_setg(errp, "block backend not configured"); +return 1; +} + +return 0; +} + +int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +{ +if (nvme_ns_check_constraints(ns, errp)) { +return -1; +} + +if (nvme_ns_init_blk(n, ns, &n->id_ctrl, errp)) { +return -1; +} + +nvme_ns_init(ns); +if (nvme_register_namespace(n, ns, errp)) { +return -1; +} + +return 0; +} + +static void nvme_ns_realize(DeviceState *dev, Error **errp) +{ +NvmeNamespace *ns = NVME_NS(dev); +BusState *s = qdev_get_parent_bus(dev); +NvmeCtrl *n = NVME(s->parent); +Error *local_err = NULL; + +if (nvme_ns_setup(n, ns, &local_err)) { +error_propagate_prepend(errp, local_err, +"could not setup namespace: "); +return; +} +} + +static Property nvme_ns_props[] = { +DEFINE_NVME_NS_PROPERTIES(NvmeNamespace, params), +DEFINE_PROP_END_OF_LIST(), +}; + +static void nvme_ns_class_init(ObjectClass *oc, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(oc); + +set_bit(DEVICE_CATEGORY_STORAGE,