On 6/3/26 10:58 AM, Rafael J. Wysocki wrote:
> From: "Rafael J. Wysocki" <[email protected]>
>
> After commit 9b311b7313d6 ("ACPI: NFIT: Install Notify() handler before
> getting NFIT table"), ACPI NFIT driver removal may deadlock if an ACPI
> notify on the NFIT device is triggered concurrently. A similar deadlock
> may occur if an ACPI notify on the NFIT device is triggered during a
> failing driver probe.
>
> The deadlock is possible because acpi_dev_remove_notify_handler() calls
> acpi_os_wait_events_complete() after removing the notify handler and the
> driver core invokes it under the NFIT platform device lock which is also
> acquired by acpi_nfit_notify(). Thus acpi_os_wait_events_complete() may
> be waiting for acpi_nfit_notify() to complete, but the latter may not be
> able to acquire the device lock which is being held by the driver core
> while the former is being executed.
>
> Moreover, after commit 03667e146f81 ("ACPI: NFIT: core: Convert the
> driver to a platform one"), there are no sysfs notifications regarding
> NVDIMM devices because __acpi_nvdimm_notify() always bails out after
> checking the driver data pointer of the device's parent. That parent
> is the ACPI companion of the platform device used for driver binding,
> so its driver data pointer is always NULL after the commit in question
> which was overlooked by it.
>
> A remedy for the deadlock is to use a special separate lock for ACPI
> notify synchronization with driver probe and removal instead of the
> device lock of the NFIT device, while a remedy for the second issue
> is to populate the driver data pointer of the NFIT device's ACPI
> companion when the driver is ready to operate, so do both these things.
> However, since the new lock is not held across the entire teardown and
> acpi_nfit_notify() should do nothing when teardown is in progress, make
> it check the driver data pointer of the NFIT device's ACPI companion, in
> analogy with the existing check in __acpi_nvdimm_notify(), and bail out
> if that pointer is NULL.
>
> Fixes: 9b311b7313d6 ("ACPI: NFIT: Install Notify() handler before getting
> NFIT table")
> Fixes: 03667e146f81 ("ACPI: NFIT: core: Convert the driver to a platform one")
> Signed-off-by: Rafael J. Wysocki <[email protected]>
> Cc: All applicable <[email protected]>
Reviewed-by: Dave Jiang <[email protected]>
> ---
> drivers/acpi/nfit/core.c | 63 ++++++++++++++++++++++++++++++++--------
> 1 file changed, 51 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
> index aaa84ae7a20e..cb771d9cadb2 100644
> --- a/drivers/acpi/nfit/core.c
> +++ b/drivers/acpi/nfit/core.c
> @@ -56,6 +56,8 @@ MODULE_PARM_DESC(force_labels, "Opt-in to labels despite
> missing methods");
> LIST_HEAD(acpi_descs);
> DEFINE_MUTEX(acpi_desc_lock);
>
> +DEFINE_MUTEX(acpi_notify_lock);
> +
> static struct workqueue_struct *nfit_wq;
>
> struct nfit_table_prev {
> @@ -1708,9 +1710,15 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32
> event, void *data)
> struct acpi_device *adev = data;
> struct device *dev = &adev->dev;
>
> - device_lock(dev->parent);
> + /*
> + * Locking is needed here for synchronization with driver probe and
> + * removal and the parent's driver data pointer is NULL when teardown
> + * is in progress (while the parent here is expected to be the ACPI
> + * companion of the platform device used for driver binding).
> + */
> + guard(mutex)(&acpi_notify_lock);
> +
> __acpi_nvdimm_notify(dev, event);
> - device_unlock(dev->parent);
> }
>
> static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
> @@ -3156,11 +3164,10 @@ EXPORT_SYMBOL_GPL(acpi_nfit_init);
> static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
> {
> struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
> - struct device *dev = acpi_desc->dev;
>
> - /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
> - device_lock(dev);
> - device_unlock(dev);
> + /* Bounce the notify lock to flush acpi_nfit_probe / acpi_nfit_notify */
> + mutex_lock(&acpi_notify_lock);
> + mutex_unlock(&acpi_notify_lock);
>
> /* Bounce the init_mutex to complete initial registration */
> mutex_lock(&acpi_desc->init_mutex);
> @@ -3292,10 +3299,17 @@ static void acpi_nfit_put_table(void *table)
> static void acpi_nfit_notify(acpi_handle handle, u32 event, void *data)
> {
> struct device *dev = data;
> + struct acpi_device *adev = ACPI_COMPANION(dev);
>
> - device_lock(dev);
> - __acpi_nfit_notify(dev, handle, event);
> - device_unlock(dev);
> + /*
> + * Locking is needed here for synchronization with driver probe and
> + * removal and the ACPI companion's driver data pointer is NULL when
> + * teardown is in progress.
> + */
> + guard(mutex)(&acpi_notify_lock);
> +
> + if (dev_get_drvdata(&adev->dev))
> + __acpi_nfit_notify(dev, handle, event);
> }
>
> void acpi_nfit_shutdown(void *data)
> @@ -3337,11 +3351,18 @@ static int acpi_nfit_probe(struct platform_device
> *pdev)
> struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
> struct acpi_nfit_desc *acpi_desc;
> struct device *dev = &pdev->dev;
> + struct acpi_device *adev = ACPI_COMPANION(dev);
> struct acpi_table_header *tbl;
> acpi_status status = AE_OK;
> acpi_size sz;
> int rc = 0;
>
> + /*
> + * Prevent acpi_nfit_notify() from progressing until the probe is
> + * complete in case there is a concurrent event to process.
> + */
> + guard(mutex)(&acpi_notify_lock);
> +
> rc = devm_acpi_install_notify_handler(dev, ACPI_DEVICE_NOTIFY,
> acpi_nfit_notify, dev);
> if (rc)
> @@ -3357,6 +3378,11 @@ static int acpi_nfit_probe(struct platform_device
> *pdev)
> * data in the format of a series of NFIT Structures.
> */
> dev_dbg(dev, "failed to find NFIT at startup\n");
> + /*
> + * Let acpi_nfit_update_notify() run in case it will need to
> + * allocate the acpi_desc object.
> + */
> + dev_set_drvdata(&adev->dev, dev);
> return 0;
> }
>
> @@ -3374,7 +3400,7 @@ static int acpi_nfit_probe(struct platform_device *pdev)
> acpi_desc->acpi_header = *tbl;
>
> /* Evaluate _FIT and override with that if present */
> - status = acpi_evaluate_object(ACPI_HANDLE(dev), "_FIT", NULL, &buf);
> + status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
> if (ACPI_SUCCESS(status) && buf.length > 0) {
> union acpi_object *obj = buf.pointer;
>
> @@ -3391,14 +3417,27 @@ static int acpi_nfit_probe(struct platform_device
> *pdev)
> + sizeof(struct acpi_table_nfit),
> sz - sizeof(struct acpi_table_nfit));
>
> - if (rc)
> + if (rc) {
> acpi_nfit_shutdown(acpi_desc);
> + return rc;
> + }
>
> - return rc;
> + /*
> + * Let notify handlers operate (the actual value of the ACPI companion's
> + * driver data pointer does not matter here so long as it is not NULL).
> + */
> + dev_set_drvdata(&adev->dev, dev);
> + return 0;
> }
>
> static void acpi_nfit_remove(struct platform_device *pdev)
> {
> + struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
> +
> + guard(mutex)(&acpi_notify_lock);
> +
> + /* Make notify handlers bail out early going forward. */
> + dev_set_drvdata(&adev->dev, NULL);
> acpi_nfit_shutdown(platform_get_drvdata(pdev));
> }
>