On 6/3/26 10:58 AM, Rafael J. Wysocki wrote:
> From: "Rafael J. Wysocki" <[email protected]>
> 
> After commit 9b311b7313d6 ("ACPI: NFIT: Install Notify() handler before
> getting NFIT table"), ACPI NFIT driver removal may deadlock if an ACPI
> notify on the NFIT device is triggered concurrently.  A similar deadlock
> may occur if an ACPI notify on the NFIT device is triggered during a
> failing driver probe.
> 
> The deadlock is possible because acpi_dev_remove_notify_handler() calls
> acpi_os_wait_events_complete() after removing the notify handler and the
> driver core invokes it under the NFIT platform device lock which is also
> acquired by acpi_nfit_notify().  Thus acpi_os_wait_events_complete() may
> be waiting for acpi_nfit_notify() to complete, but the latter may not be
> able to acquire the device lock which is being held by the driver core
> while the former is being executed.
> 
> Moreover, after commit 03667e146f81 ("ACPI: NFIT: core: Convert the
> driver to a platform one"), there are no sysfs notifications regarding
> NVDIMM devices because __acpi_nvdimm_notify() always bails out after
> checking the driver data pointer of the device's parent.  That parent
> is the ACPI companion of the platform device used for driver binding,
> so its driver data pointer is always NULL after the commit in question
> which was overlooked by it.
> 
> A remedy for the deadlock is to use a special separate lock for ACPI
> notify synchronization with driver probe and removal instead of the
> device lock of the NFIT device, while a remedy for the second issue
> is to populate the driver data pointer of the NFIT device's ACPI
> companion when the driver is ready to operate, so do both these things.
> However, since the new lock is not held across the entire teardown and
> acpi_nfit_notify() should do nothing when teardown is in progress, make
> it check the driver data pointer of the NFIT device's ACPI companion, in
> analogy with the existing check in __acpi_nvdimm_notify(), and bail out
> if that pointer is NULL.
> 
> Fixes: 9b311b7313d6 ("ACPI: NFIT: Install Notify() handler before getting 
> NFIT table")
> Fixes: 03667e146f81 ("ACPI: NFIT: core: Convert the driver to a platform one")
> Signed-off-by: Rafael J. Wysocki <[email protected]>
> Cc: All applicable <[email protected]>

Reviewed-by: Dave Jiang <[email protected]>


> ---
>  drivers/acpi/nfit/core.c | 63 ++++++++++++++++++++++++++++++++--------
>  1 file changed, 51 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
> index aaa84ae7a20e..cb771d9cadb2 100644
> --- a/drivers/acpi/nfit/core.c
> +++ b/drivers/acpi/nfit/core.c
> @@ -56,6 +56,8 @@ MODULE_PARM_DESC(force_labels, "Opt-in to labels despite 
> missing methods");
>  LIST_HEAD(acpi_descs);
>  DEFINE_MUTEX(acpi_desc_lock);
>  
> +DEFINE_MUTEX(acpi_notify_lock);
> +
>  static struct workqueue_struct *nfit_wq;
>  
>  struct nfit_table_prev {
> @@ -1708,9 +1710,15 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 
> event, void *data)
>       struct acpi_device *adev = data;
>       struct device *dev = &adev->dev;
>  
> -     device_lock(dev->parent);
> +     /*
> +      * Locking is needed here for synchronization with driver probe and
> +      * removal and the parent's driver data pointer is NULL when teardown
> +      * is in progress (while the parent here is expected to be the ACPI
> +      * companion of the platform device used for driver binding).
> +      */
> +     guard(mutex)(&acpi_notify_lock);
> +
>       __acpi_nvdimm_notify(dev, event);
> -     device_unlock(dev->parent);
>  }
>  
>  static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
> @@ -3156,11 +3164,10 @@ EXPORT_SYMBOL_GPL(acpi_nfit_init);
>  static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
>  {
>       struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
> -     struct device *dev = acpi_desc->dev;
>  
> -     /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
> -     device_lock(dev);
> -     device_unlock(dev);
> +     /* Bounce the notify lock to flush acpi_nfit_probe / acpi_nfit_notify */
> +     mutex_lock(&acpi_notify_lock);
> +     mutex_unlock(&acpi_notify_lock);
>  
>       /* Bounce the init_mutex to complete initial registration */
>       mutex_lock(&acpi_desc->init_mutex);
> @@ -3292,10 +3299,17 @@ static void acpi_nfit_put_table(void *table)
>  static void acpi_nfit_notify(acpi_handle handle, u32 event, void *data)
>  {
>       struct device *dev = data;
> +     struct acpi_device *adev = ACPI_COMPANION(dev);
>  
> -     device_lock(dev);
> -     __acpi_nfit_notify(dev, handle, event);
> -     device_unlock(dev);
> +     /*
> +      * Locking is needed here for synchronization with driver probe and
> +      * removal and the ACPI companion's driver data pointer is NULL when
> +      * teardown is in progress.
> +      */
> +     guard(mutex)(&acpi_notify_lock);
> +
> +     if (dev_get_drvdata(&adev->dev))
> +             __acpi_nfit_notify(dev, handle, event);
>  }
>  
>  void acpi_nfit_shutdown(void *data)
> @@ -3337,11 +3351,18 @@ static int acpi_nfit_probe(struct platform_device 
> *pdev)
>       struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
>       struct acpi_nfit_desc *acpi_desc;
>       struct device *dev = &pdev->dev;
> +     struct acpi_device *adev = ACPI_COMPANION(dev);
>       struct acpi_table_header *tbl;
>       acpi_status status = AE_OK;
>       acpi_size sz;
>       int rc = 0;
>  
> +     /*
> +      * Prevent acpi_nfit_notify() from progressing until the probe is
> +      * complete in case there is a concurrent event to process.
> +      */
> +     guard(mutex)(&acpi_notify_lock);
> +
>       rc = devm_acpi_install_notify_handler(dev, ACPI_DEVICE_NOTIFY,
>                                             acpi_nfit_notify, dev);
>       if (rc)
> @@ -3357,6 +3378,11 @@ static int acpi_nfit_probe(struct platform_device 
> *pdev)
>                * data in the format of a series of NFIT Structures.
>                */
>               dev_dbg(dev, "failed to find NFIT at startup\n");
> +             /*
> +              * Let acpi_nfit_update_notify() run in case it will need to
> +              * allocate the acpi_desc object.
> +              */
> +             dev_set_drvdata(&adev->dev, dev);
>               return 0;
>       }
>  
> @@ -3374,7 +3400,7 @@ static int acpi_nfit_probe(struct platform_device *pdev)
>       acpi_desc->acpi_header = *tbl;
>  
>       /* Evaluate _FIT and override with that if present */
> -     status = acpi_evaluate_object(ACPI_HANDLE(dev), "_FIT", NULL, &buf);
> +     status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
>       if (ACPI_SUCCESS(status) && buf.length > 0) {
>               union acpi_object *obj = buf.pointer;
>  
> @@ -3391,14 +3417,27 @@ static int acpi_nfit_probe(struct platform_device 
> *pdev)
>                               + sizeof(struct acpi_table_nfit),
>                               sz - sizeof(struct acpi_table_nfit));
>  
> -     if (rc)
> +     if (rc) {
>               acpi_nfit_shutdown(acpi_desc);
> +             return rc;
> +     }
>  
> -     return rc;
> +     /*
> +      * Let notify handlers operate (the actual value of the ACPI companion's
> +      * driver data pointer does not matter here so long as it is not NULL).
> +      */
> +     dev_set_drvdata(&adev->dev, dev);
> +     return 0;
>  }
>  
>  static void acpi_nfit_remove(struct platform_device *pdev)
>  {
> +     struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
> +
> +     guard(mutex)(&acpi_notify_lock);
> +
> +     /* Make notify handlers bail out early going forward. */
> +     dev_set_drvdata(&adev->dev, NULL);
>       acpi_nfit_shutdown(platform_get_drvdata(pdev));
>  }
>  


Reply via email to