[PATCH v2 12/20] libnd, nd_acpi: add interleave-set state-tracking infrastructure

2015-04-28 Thread Dan Williams
On platforms that have firmware support for reading/writing per-dimm
label space, a portion of the dimm may be accessible via an interleave
set PMEM mapping in addition to the dimm's BLK (block-data-window
aperture(s)) interface.  A label, stored in a "configuration data
region" on the dimm, disambiguates which dimm addresses are accessed
through which exclusive interface.

Add infrastructure that allows the kernel to block modifications to a
label in the set while any member dimm is active.  Note that this is
meant only for enforcing "no modifications of active labels" via the
coarse ioctl command.  Adding/deleting namespaces from an active
interleave set will only be possible via sysfs.

Another aspect of tracking interleave sets is tracking their integrity
when DIMMs in a set are physically re-ordered.  For this purpose we
generate an "interleave-set cookie" that can be recorded in a label and
validated against the current configuration.  It is the bus provider
implementation's responsibility to calculate the interleave set cookie
and attach it to a given region.

Cc: Neil Brown 
Cc: 
Cc: Greg KH 
Cc: Robert Moore 
Cc: Rafael J. Wysocki 
Signed-off-by: Dan Williams 
---
 drivers/block/nd/acpi.c|   90 
 drivers/block/nd/bus.c |   41 ++
 drivers/block/nd/core.c|   47 +
 drivers/block/nd/dimm_devs.c   |   19 
 drivers/block/nd/libnd.h   |6 +++
 drivers/block/nd/nd-private.h  |   11 -
 drivers/block/nd/nd.h  |4 ++
 drivers/block/nd/region_devs.c |   85 ++
 8 files changed, 299 insertions(+), 4 deletions(-)

diff --git a/drivers/block/nd/acpi.c b/drivers/block/nd/acpi.c
index c3dda74f73d7..d34cefe38e2f 100644
--- a/drivers/block/nd/acpi.c
+++ b/drivers/block/nd/acpi.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "acpi_nfit.h"
 #include "libnd.h"
 
@@ -779,6 +780,90 @@ static const struct attribute_group 
*nd_acpi_region_attribute_groups[] = {
NULL,
 };
 
+/* enough info to uniquely specify an interleave set */
+struct nfit_set_info {
+   struct nfit_set_info_map {
+   u64 region_spa_offset;
+   u32 serial_number;
+   u32 pad;
+   } mapping[0];
+};
+
+static size_t sizeof_nfit_set_info(int num_mappings)
+{
+   return sizeof(struct nfit_set_info)
+   + num_mappings * sizeof(struct nfit_set_info_map);
+}
+
+static int cmp_map(const void *m0, const void *m1)
+{
+   const struct nfit_set_info_map *map0 = m0;
+   const struct nfit_set_info_map *map1 = m1;
+
+   return memcmp(>region_spa_offset, >region_spa_offset,
+   sizeof(u64));
+}
+
+/* Retrieve the nth entry referencing this spa */
+static struct acpi_nfit_memdev *memdev_from_spa(
+   struct acpi_nfit_desc *acpi_desc, u16 spa_index, int n)
+{
+struct nfit_memdev *nfit_memdev;
+
+list_for_each_entry(nfit_memdev, _desc->memdevs, list)
+if (nfit_memdev->memdev->spa_index == spa_index)
+if (n-- == 0)
+return nfit_memdev->memdev;
+return NULL;
+}
+
+static int nd_acpi_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
+   struct nd_region_desc *ndr_desc, struct acpi_nfit_spa *spa)
+{
+   u16 num_mappings = ndr_desc->num_mappings;
+   int i, spa_type = nfit_spa_type(spa);
+   struct device *dev = acpi_desc->dev;
+   struct nd_interleave_set *nd_set;
+   struct nfit_set_info *info;
+
+   if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
+   /* pass */;
+   else
+   return 0;
+
+   nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
+   if (!nd_set)
+   return -ENOMEM;
+
+   info = devm_kzalloc(dev, sizeof_nfit_set_info(num_mappings), 
GFP_KERNEL);
+   if (!info)
+   return -ENOMEM;
+   for (i = 0; i < num_mappings; i++) {
+   struct nd_mapping *nd_mapping = _desc->nd_mapping[i];
+   struct nfit_set_info_map *map = >mapping[i];
+   struct nd_dimm *nd_dimm = nd_mapping->nd_dimm;
+   struct nfit_mem *nfit_mem = nd_dimm_provider_data(nd_dimm);
+   struct acpi_nfit_memdev *memdev = memdev_from_spa(acpi_desc,
+   spa->spa_index, i);
+
+   if (!memdev || !nfit_mem->dcr) {
+   dev_err(dev, "%s: failed to find DCR\n", __func__);
+   return -ENODEV;
+   }
+
+   map->region_spa_offset = memdev->region_spa_offset;
+   map->serial_number = nfit_mem->dcr->serial_number;
+   }
+
+   sort(>mapping[0], num_mappings, sizeof(struct nfit_set_info_map),
+   cmp_map, NULL);
+   nd_set->cookie = nd_fletcher64(info, 
sizeof_nfit_set_info(num_mappings));
+   

[PATCH v2 12/20] libnd, nd_acpi: add interleave-set state-tracking infrastructure

2015-04-28 Thread Dan Williams
On platforms that have firmware support for reading/writing per-dimm
label space, a portion of the dimm may be accessible via an interleave
set PMEM mapping in addition to the dimm's BLK (block-data-window
aperture(s)) interface.  A label, stored in a configuration data
region on the dimm, disambiguates which dimm addresses are accessed
through which exclusive interface.

Add infrastructure that allows the kernel to block modifications to a
label in the set while any member dimm is active.  Note that this is
meant only for enforcing no modifications of active labels via the
coarse ioctl command.  Adding/deleting namespaces from an active
interleave set will only be possible via sysfs.

Another aspect of tracking interleave sets is tracking their integrity
when DIMMs in a set are physically re-ordered.  For this purpose we
generate an interleave-set cookie that can be recorded in a label and
validated against the current configuration.  It is the bus provider
implementation's responsibility to calculate the interleave set cookie
and attach it to a given region.

Cc: Neil Brown ne...@suse.de
Cc: linux-a...@vger.kernel.org
Cc: Greg KH gre...@linuxfoundation.org
Cc: Robert Moore robert.mo...@intel.com
Cc: Rafael J. Wysocki rafael.j.wyso...@intel.com
Signed-off-by: Dan Williams dan.j.willi...@intel.com
---
 drivers/block/nd/acpi.c|   90 
 drivers/block/nd/bus.c |   41 ++
 drivers/block/nd/core.c|   47 +
 drivers/block/nd/dimm_devs.c   |   19 
 drivers/block/nd/libnd.h   |6 +++
 drivers/block/nd/nd-private.h  |   11 -
 drivers/block/nd/nd.h  |4 ++
 drivers/block/nd/region_devs.c |   85 ++
 8 files changed, 299 insertions(+), 4 deletions(-)

diff --git a/drivers/block/nd/acpi.c b/drivers/block/nd/acpi.c
index c3dda74f73d7..d34cefe38e2f 100644
--- a/drivers/block/nd/acpi.c
+++ b/drivers/block/nd/acpi.c
@@ -15,6 +15,7 @@
 #include linux/ndctl.h
 #include linux/list.h
 #include linux/acpi.h
+#include linux/sort.h
 #include acpi_nfit.h
 #include libnd.h
 
@@ -779,6 +780,90 @@ static const struct attribute_group 
*nd_acpi_region_attribute_groups[] = {
NULL,
 };
 
+/* enough info to uniquely specify an interleave set */
+struct nfit_set_info {
+   struct nfit_set_info_map {
+   u64 region_spa_offset;
+   u32 serial_number;
+   u32 pad;
+   } mapping[0];
+};
+
+static size_t sizeof_nfit_set_info(int num_mappings)
+{
+   return sizeof(struct nfit_set_info)
+   + num_mappings * sizeof(struct nfit_set_info_map);
+}
+
+static int cmp_map(const void *m0, const void *m1)
+{
+   const struct nfit_set_info_map *map0 = m0;
+   const struct nfit_set_info_map *map1 = m1;
+
+   return memcmp(map0-region_spa_offset, map1-region_spa_offset,
+   sizeof(u64));
+}
+
+/* Retrieve the nth entry referencing this spa */
+static struct acpi_nfit_memdev *memdev_from_spa(
+   struct acpi_nfit_desc *acpi_desc, u16 spa_index, int n)
+{
+struct nfit_memdev *nfit_memdev;
+
+list_for_each_entry(nfit_memdev, acpi_desc-memdevs, list)
+if (nfit_memdev-memdev-spa_index == spa_index)
+if (n-- == 0)
+return nfit_memdev-memdev;
+return NULL;
+}
+
+static int nd_acpi_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
+   struct nd_region_desc *ndr_desc, struct acpi_nfit_spa *spa)
+{
+   u16 num_mappings = ndr_desc-num_mappings;
+   int i, spa_type = nfit_spa_type(spa);
+   struct device *dev = acpi_desc-dev;
+   struct nd_interleave_set *nd_set;
+   struct nfit_set_info *info;
+
+   if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
+   /* pass */;
+   else
+   return 0;
+
+   nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
+   if (!nd_set)
+   return -ENOMEM;
+
+   info = devm_kzalloc(dev, sizeof_nfit_set_info(num_mappings), 
GFP_KERNEL);
+   if (!info)
+   return -ENOMEM;
+   for (i = 0; i  num_mappings; i++) {
+   struct nd_mapping *nd_mapping = ndr_desc-nd_mapping[i];
+   struct nfit_set_info_map *map = info-mapping[i];
+   struct nd_dimm *nd_dimm = nd_mapping-nd_dimm;
+   struct nfit_mem *nfit_mem = nd_dimm_provider_data(nd_dimm);
+   struct acpi_nfit_memdev *memdev = memdev_from_spa(acpi_desc,
+   spa-spa_index, i);
+
+   if (!memdev || !nfit_mem-dcr) {
+   dev_err(dev, %s: failed to find DCR\n, __func__);
+   return -ENODEV;
+   }
+
+   map-region_spa_offset = memdev-region_spa_offset;
+   map-serial_number = nfit_mem-dcr-serial_number;
+   }
+
+   sort(info-mapping[0],