Re: [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range

2019-08-20 Thread Oliver O'Halloran
On Tue, Aug 20, 2019 at 12:30 PM Santosh Sivaraj  wrote:
>
> Subscribe to the MCE notification and add the physical address which
> generated a memory error to nvdimm bad range.

Uh... I should have looked a bit closer at this on v1.

a) of_pmem.c isn't part of the powerpc tree. You should have CCed this
to the nvdimm list since you'll need an Ack from Dan Williams.
b) of_pmem isn't powerpc specific. Adding a pile of powerpc specific
machine check parsing means it's not going to compile on other DT
platforms.
c) all this appears to be copied and pasted from the papr_scm version of this.

Considering this is pretty similar in spirit to what's done on x86
today (drivers/acpi/nfit/mce.c) I think you would get more milage out
of moving the address matching into libnvdimm itself. Machine check
handling is always going to be arch specific, but memory errors could
be re-emitted by the arch code into a more generic notifier chain that
libnvdimm use. There's probably other uses in mm/ for such a chain
too.

Oliver


> Signed-off-by: Santosh Sivaraj 
> ---
>  drivers/nvdimm/of_pmem.c | 151 +--
>  1 file changed, 131 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
> index a0c8dcfa0bf9..155e56862fdf 100644
> --- a/drivers/nvdimm/of_pmem.c
> +++ b/drivers/nvdimm/of_pmem.c
> @@ -8,6 +8,9 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
> +#include 
>
>  static const struct attribute_group *region_attr_groups[] = {
> _region_attribute_group,
> @@ -25,11 +28,77 @@ struct of_pmem_private {
> struct nvdimm_bus *bus;
>  };
>
> +struct of_pmem_region {
> +   struct of_pmem_private *priv;
> +   struct nd_region_desc *region_desc;
> +   struct nd_region *region;
> +   struct list_head region_list;
> +};
> +
> +LIST_HEAD(pmem_regions);
> +DEFINE_MUTEX(pmem_region_lock);
> +
> +static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
> +void *data)
> +{
> +   struct machine_check_event *evt = data;
> +   struct of_pmem_region *pmem_region;
> +   u64 aligned_addr, phys_addr;
> +   bool found = false;
> +
> +   if (evt->error_type != MCE_ERROR_TYPE_UE)
> +   return NOTIFY_DONE;
> +
> +   if (list_empty(_regions))
> +   return NOTIFY_DONE;
> +
> +   phys_addr = evt->u.ue_error.physical_address +
> +   (evt->u.ue_error.effective_address & ~PAGE_MASK);
> +
> +   if (!evt->u.ue_error.physical_address_provided ||
> +   !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
> +   return NOTIFY_DONE;
> +
> +   mutex_lock(_region_lock);
> +   list_for_each_entry(pmem_region, _regions, region_list) {
> +   struct resource *res = pmem_region->region_desc->res;
> +
> +   if (phys_addr >= res->start && phys_addr <= res->end) {
> +   found = true;
> +   break;
> +   }
> +   }
> +   mutex_unlock(_region_lock);
> +
> +   if (!found)
> +   return NOTIFY_DONE;
> +
> +   aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
> +
> +   if (nvdimm_bus_add_badrange(pmem_region->priv->bus, aligned_addr,
> +   L1_CACHE_BYTES))
> +   return NOTIFY_DONE;
> +
> +   pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
> +aligned_addr, aligned_addr + L1_CACHE_BYTES);
> +
> +
> +   nvdimm_region_notify(pmem_region->region, NVDIMM_REVALIDATE_POISON);
> +
> +   return NOTIFY_OK;
> +}
> +
> +static struct notifier_block mce_ue_nb = {
> +   .notifier_call = handle_mce_ue
> +};
> +
>  static int of_pmem_region_probe(struct platform_device *pdev)
>  {
> struct of_pmem_private *priv;
> struct device_node *np;
> struct nvdimm_bus *bus;
> +   struct of_pmem_region *pmem_region;
> +   struct nd_region_desc *ndr_desc;
> bool is_volatile;
> int i;
>
> @@ -58,32 +127,49 @@ static int of_pmem_region_probe(struct platform_device 
> *pdev)
> is_volatile ? "volatile" : "non-volatile",  np);
>
> for (i = 0; i < pdev->num_resources; i++) {
> -   struct nd_region_desc ndr_desc;
> struct nd_region *region;
>
> -   /*
> -* NB: libnvdimm copies the data from ndr_desc into it's own
> -* structures so passing a stack pointer is fine.
> -*/
> -   memset(_desc, 0, sizeof(ndr_desc));
> -   ndr_desc.attr_groups = region_attr_groups;
> -   ndr_desc.numa_node = dev_to_node(>dev);
> -   ndr_desc.target_node = ndr_desc.numa_node;
> -   ndr_desc.res = >resource[i];
> -   ndr_desc.of_node = np;
> -   set_bit(ND_REGION_PAGEMAP, _desc.flags);
> +   ndr_desc = 

[PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range

2019-08-19 Thread Santosh Sivaraj
Subscribe to the MCE notification and add the physical address which
generated a memory error to nvdimm bad range.

Signed-off-by: Santosh Sivaraj 
---
 drivers/nvdimm/of_pmem.c | 151 +--
 1 file changed, 131 insertions(+), 20 deletions(-)

diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index a0c8dcfa0bf9..155e56862fdf 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -8,6 +8,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 static const struct attribute_group *region_attr_groups[] = {
_region_attribute_group,
@@ -25,11 +28,77 @@ struct of_pmem_private {
struct nvdimm_bus *bus;
 };
 
+struct of_pmem_region {
+   struct of_pmem_private *priv;
+   struct nd_region_desc *region_desc;
+   struct nd_region *region;
+   struct list_head region_list;
+};
+
+LIST_HEAD(pmem_regions);
+DEFINE_MUTEX(pmem_region_lock);
+
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+void *data)
+{
+   struct machine_check_event *evt = data;
+   struct of_pmem_region *pmem_region;
+   u64 aligned_addr, phys_addr;
+   bool found = false;
+
+   if (evt->error_type != MCE_ERROR_TYPE_UE)
+   return NOTIFY_DONE;
+
+   if (list_empty(_regions))
+   return NOTIFY_DONE;
+
+   phys_addr = evt->u.ue_error.physical_address +
+   (evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+   if (!evt->u.ue_error.physical_address_provided ||
+   !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+   return NOTIFY_DONE;
+
+   mutex_lock(_region_lock);
+   list_for_each_entry(pmem_region, _regions, region_list) {
+   struct resource *res = pmem_region->region_desc->res;
+
+   if (phys_addr >= res->start && phys_addr <= res->end) {
+   found = true;
+   break;
+   }
+   }
+   mutex_unlock(_region_lock);
+
+   if (!found)
+   return NOTIFY_DONE;
+
+   aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+
+   if (nvdimm_bus_add_badrange(pmem_region->priv->bus, aligned_addr,
+   L1_CACHE_BYTES))
+   return NOTIFY_DONE;
+
+   pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
+aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+
+   nvdimm_region_notify(pmem_region->region, NVDIMM_REVALIDATE_POISON);
+
+   return NOTIFY_OK;
+}
+
+static struct notifier_block mce_ue_nb = {
+   .notifier_call = handle_mce_ue
+};
+
 static int of_pmem_region_probe(struct platform_device *pdev)
 {
struct of_pmem_private *priv;
struct device_node *np;
struct nvdimm_bus *bus;
+   struct of_pmem_region *pmem_region;
+   struct nd_region_desc *ndr_desc;
bool is_volatile;
int i;
 
@@ -58,32 +127,49 @@ static int of_pmem_region_probe(struct platform_device 
*pdev)
is_volatile ? "volatile" : "non-volatile",  np);
 
for (i = 0; i < pdev->num_resources; i++) {
-   struct nd_region_desc ndr_desc;
struct nd_region *region;
 
-   /*
-* NB: libnvdimm copies the data from ndr_desc into it's own
-* structures so passing a stack pointer is fine.
-*/
-   memset(_desc, 0, sizeof(ndr_desc));
-   ndr_desc.attr_groups = region_attr_groups;
-   ndr_desc.numa_node = dev_to_node(>dev);
-   ndr_desc.target_node = ndr_desc.numa_node;
-   ndr_desc.res = >resource[i];
-   ndr_desc.of_node = np;
-   set_bit(ND_REGION_PAGEMAP, _desc.flags);
+   ndr_desc = kzalloc(sizeof(struct nd_region_desc), GFP_KERNEL);
+   if (!ndr_desc) {
+   nvdimm_bus_unregister(priv->bus);
+   kfree(priv);
+   return -ENOMEM;
+   }
+
+   ndr_desc->attr_groups = region_attr_groups;
+   ndr_desc->numa_node = dev_to_node(>dev);
+   ndr_desc->target_node = ndr_desc->numa_node;
+   ndr_desc->res = >resource[i];
+   ndr_desc->of_node = np;
+   set_bit(ND_REGION_PAGEMAP, _desc->flags);
 
if (is_volatile)
-   region = nvdimm_volatile_region_create(bus, _desc);
+   region = nvdimm_volatile_region_create(bus, ndr_desc);
else
-   region = nvdimm_pmem_region_create(bus, _desc);
+   region = nvdimm_pmem_region_create(bus, ndr_desc);
 
-   if (!region)
+   if (!region) {
dev_warn(>dev, "Unable to register region %pR 
from %pOF\n",
-   ndr_desc.res, np);
-   

[PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range

2019-08-14 Thread Santosh Sivaraj
Subscribe to the MCE notification and add the physical address which
generated a memory error to nvdimm bad range.

Signed-off-by: Santosh Sivaraj 
---
 drivers/nvdimm/of_pmem.c | 122 +--
 1 file changed, 103 insertions(+), 19 deletions(-)

diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index a0c8dcfa0bf9..828dbfe44ca6 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -8,6 +8,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 static const struct attribute_group *region_attr_groups[] = {
_region_attribute_group,
@@ -25,11 +28,77 @@ struct of_pmem_private {
struct nvdimm_bus *bus;
 };
 
+struct of_pmem_region {
+   struct of_pmem_private *priv;
+   struct nd_region_desc *region_desc;
+   struct nd_region *region;
+   struct list_head list;
+};
+
+LIST_HEAD(pmem_regions);
+DEFINE_MUTEX(pmem_region_lock);
+
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+void *data)
+{
+   struct machine_check_event *evt = data;
+   struct of_pmem_region *pmem_region;
+   u64 phys_addr;
+
+   if (evt->error_type != MCE_ERROR_TYPE_UE)
+   return NOTIFY_DONE;
+
+   if (list_empty(_regions))
+   return NOTIFY_DONE;
+
+   phys_addr = evt->u.ue_error.physical_address +
+   (evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+   if (!evt->u.ue_error.physical_address_provided ||
+   !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+   return NOTIFY_DONE;
+
+   mutex_lock(_region_lock);
+   list_for_each_entry(pmem_region, _regions, list) {
+   struct resource *res = pmem_region->region_desc->res;
+   u64 aligned_addr;
+
+   if (res->start > phys_addr)
+   continue;
+
+   if (res->end < phys_addr)
+   continue;
+
+   aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+   pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
+aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+   if (nvdimm_bus_add_badrange(pmem_region->priv->bus,
+aligned_addr, L1_CACHE_BYTES))
+   pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
+   aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+   nvdimm_region_notify(pmem_region->region,
+NVDIMM_REVALIDATE_POISON);
+
+   break;
+   }
+   mutex_unlock(_region_lock);
+
+   return NOTIFY_OK;
+}
+
+static struct notifier_block mce_ue_nb = {
+   .notifier_call = handle_mce_ue
+};
+
 static int of_pmem_region_probe(struct platform_device *pdev)
 {
struct of_pmem_private *priv;
struct device_node *np;
struct nvdimm_bus *bus;
+   struct of_pmem_region *pmem_region;
+   struct nd_region_desc *ndr_desc;
bool is_volatile;
int i;
 
@@ -58,34 +127,49 @@ static int of_pmem_region_probe(struct platform_device 
*pdev)
is_volatile ? "volatile" : "non-volatile",  np);
 
for (i = 0; i < pdev->num_resources; i++) {
-   struct nd_region_desc ndr_desc;
struct nd_region *region;
 
-   /*
-* NB: libnvdimm copies the data from ndr_desc into it's own
-* structures so passing a stack pointer is fine.
-*/
-   memset(_desc, 0, sizeof(ndr_desc));
-   ndr_desc.attr_groups = region_attr_groups;
-   ndr_desc.numa_node = dev_to_node(>dev);
-   ndr_desc.target_node = ndr_desc.numa_node;
-   ndr_desc.res = >resource[i];
-   ndr_desc.of_node = np;
-   set_bit(ND_REGION_PAGEMAP, _desc.flags);
+   ndr_desc = kzalloc(sizeof(struct nd_region_desc), GFP_KERNEL);
+   if (!ndr_desc) {
+   nvdimm_bus_unregister(priv->bus);
+   kfree(priv);
+   return -ENOMEM;
+   }
+
+   ndr_desc->attr_groups = region_attr_groups;
+   ndr_desc->numa_node = dev_to_node(>dev);
+   ndr_desc->target_node = ndr_desc->numa_node;
+   ndr_desc->res = >resource[i];
+   ndr_desc->of_node = np;
+   set_bit(ND_REGION_PAGEMAP, _desc->flags);
 
if (is_volatile)
-   region = nvdimm_volatile_region_create(bus, _desc);
+   region = nvdimm_volatile_region_create(bus, ndr_desc);
else
-   region = nvdimm_pmem_region_create(bus, _desc);
+   region = nvdimm_pmem_region_create(bus, ndr_desc);
 
if (!region)
-   dev_warn(>dev, "Unable