From: "Rafael J. Wysocki" <[email protected]>

After commit 9b311b7313d6 ("ACPI: NFIT: Install Notify() handler before
getting NFIT table"), ACPI NFIT driver removal may deadlock if an ACPI
notify on the NFIT device is triggered concurrently.  A similar deadlock
may occur if an ACPI notify on the NFIT device is triggered during a
failing driver probe.

The deadlock is possible because acpi_dev_remove_notify_handler() calls
acpi_os_wait_events_complete() after removing the notify handler and the
driver core invokes it under the NFIT platform device lock which is also
acquired by acpi_nfit_notify().  Thus acpi_os_wait_events_complete() may
be waiting for acpi_nfit_notify() to complete, but the latter may not be
able to acquire the device lock which is being held by the driver core
while the former is being executed.

Moreover, after commit 03667e146f81 ("ACPI: NFIT: core: Convert the
driver to a platform one"), there are no sysfs notifications regarding
NVDIMM devices because __acpi_nvdimm_notify() always bails out after
checking the driver data pointer of the device's parent.  That parent
is the ACPI companion of the platform device used for driver binding,
so its driver data pointer is always NULL after the commit in question
which was overlooked by it.

A remedy for the deadlock is to use a special separate lock for ACPI
notify synchronization with driver probe and removal instead of the
device lock of the NFIT device, while a remedy for the second issue
is to populate the driver data pointer of the NFIT device's ACPI
companion when the driver is ready to operate, so do both these things.
However, since the new lock is not held across the entire teardown and
acpi_nfit_notify() should do nothing when teardown is in progress, make
it check the driver data pointer of the NFIT device's ACPI companion, in
analogy with the existing check in __acpi_nvdimm_notify(), and bail out
if that pointer is NULL.

Fixes: 9b311b7313d6 ("ACPI: NFIT: Install Notify() handler before getting NFIT 
table")
Fixes: 03667e146f81 ("ACPI: NFIT: core: Convert the driver to a platform one")
Signed-off-by: Rafael J. Wysocki <[email protected]>
Cc: All applicable <[email protected]>
---
 drivers/acpi/nfit/core.c | 63 ++++++++++++++++++++++++++++++++--------
 1 file changed, 51 insertions(+), 12 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index aaa84ae7a20e..cb771d9cadb2 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -56,6 +56,8 @@ MODULE_PARM_DESC(force_labels, "Opt-in to labels despite 
missing methods");
 LIST_HEAD(acpi_descs);
 DEFINE_MUTEX(acpi_desc_lock);
 
+DEFINE_MUTEX(acpi_notify_lock);
+
 static struct workqueue_struct *nfit_wq;
 
 struct nfit_table_prev {
@@ -1708,9 +1710,15 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 
event, void *data)
        struct acpi_device *adev = data;
        struct device *dev = &adev->dev;
 
-       device_lock(dev->parent);
+       /*
+        * Locking is needed here for synchronization with driver probe and
+        * removal and the parent's driver data pointer is NULL when teardown
+        * is in progress (while the parent here is expected to be the ACPI
+        * companion of the platform device used for driver binding).
+        */
+       guard(mutex)(&acpi_notify_lock);
+
        __acpi_nvdimm_notify(dev, event);
-       device_unlock(dev->parent);
 }
 
 static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
@@ -3156,11 +3164,10 @@ EXPORT_SYMBOL_GPL(acpi_nfit_init);
 static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 {
        struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
-       struct device *dev = acpi_desc->dev;
 
-       /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
-       device_lock(dev);
-       device_unlock(dev);
+       /* Bounce the notify lock to flush acpi_nfit_probe / acpi_nfit_notify */
+       mutex_lock(&acpi_notify_lock);
+       mutex_unlock(&acpi_notify_lock);
 
        /* Bounce the init_mutex to complete initial registration */
        mutex_lock(&acpi_desc->init_mutex);
@@ -3292,10 +3299,17 @@ static void acpi_nfit_put_table(void *table)
 static void acpi_nfit_notify(acpi_handle handle, u32 event, void *data)
 {
        struct device *dev = data;
+       struct acpi_device *adev = ACPI_COMPANION(dev);
 
-       device_lock(dev);
-       __acpi_nfit_notify(dev, handle, event);
-       device_unlock(dev);
+       /*
+        * Locking is needed here for synchronization with driver probe and
+        * removal and the ACPI companion's driver data pointer is NULL when
+        * teardown is in progress.
+        */
+       guard(mutex)(&acpi_notify_lock);
+
+       if (dev_get_drvdata(&adev->dev))
+               __acpi_nfit_notify(dev, handle, event);
 }
 
 void acpi_nfit_shutdown(void *data)
@@ -3337,11 +3351,18 @@ static int acpi_nfit_probe(struct platform_device *pdev)
        struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
        struct acpi_nfit_desc *acpi_desc;
        struct device *dev = &pdev->dev;
+       struct acpi_device *adev = ACPI_COMPANION(dev);
        struct acpi_table_header *tbl;
        acpi_status status = AE_OK;
        acpi_size sz;
        int rc = 0;
 
+       /*
+        * Prevent acpi_nfit_notify() from progressing until the probe is
+        * complete in case there is a concurrent event to process.
+        */
+       guard(mutex)(&acpi_notify_lock);
+
        rc = devm_acpi_install_notify_handler(dev, ACPI_DEVICE_NOTIFY,
                                              acpi_nfit_notify, dev);
        if (rc)
@@ -3357,6 +3378,11 @@ static int acpi_nfit_probe(struct platform_device *pdev)
                 * data in the format of a series of NFIT Structures.
                 */
                dev_dbg(dev, "failed to find NFIT at startup\n");
+               /*
+                * Let acpi_nfit_update_notify() run in case it will need to
+                * allocate the acpi_desc object.
+                */
+               dev_set_drvdata(&adev->dev, dev);
                return 0;
        }
 
@@ -3374,7 +3400,7 @@ static int acpi_nfit_probe(struct platform_device *pdev)
        acpi_desc->acpi_header = *tbl;
 
        /* Evaluate _FIT and override with that if present */
-       status = acpi_evaluate_object(ACPI_HANDLE(dev), "_FIT", NULL, &buf);
+       status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
        if (ACPI_SUCCESS(status) && buf.length > 0) {
                union acpi_object *obj = buf.pointer;
 
@@ -3391,14 +3417,27 @@ static int acpi_nfit_probe(struct platform_device *pdev)
                                + sizeof(struct acpi_table_nfit),
                                sz - sizeof(struct acpi_table_nfit));
 
-       if (rc)
+       if (rc) {
                acpi_nfit_shutdown(acpi_desc);
+               return rc;
+       }
 
-       return rc;
+       /*
+        * Let notify handlers operate (the actual value of the ACPI companion's
+        * driver data pointer does not matter here so long as it is not NULL).
+        */
+       dev_set_drvdata(&adev->dev, dev);
+       return 0;
 }
 
 static void acpi_nfit_remove(struct platform_device *pdev)
 {
+       struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+
+       guard(mutex)(&acpi_notify_lock);
+
+       /* Make notify handlers bail out early going forward. */
+       dev_set_drvdata(&adev->dev, NULL);
        acpi_nfit_shutdown(platform_get_drvdata(pdev));
 }
 
-- 
2.51.0





Reply via email to