On 8/19/2024 11:09 PM, Gokul Sriram Palanisamy wrote:
> From: Vignesh Viswanathan <[email protected]>
> 
> Currently the SSR subdevice notifies the client driver on crash of the
> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
> However, the client driver might be interested to know that the device
> has crashed immediately to pause any further transactions with the
> rproc. This calls for an event to be sent to the driver in the IRQ
> context as soon as the rproc crashes.
> 
> Add NOTIFY_FATAL event to SSR subdevice to atomically notify rproc has
> crashed to the client driver. The client driver in this scenario is a
> ath Wi-Fi driver which is continuously queuing data to the remoteproc and
> needs to know if remoteproc crashes as soon as possible to stop queuing
> further data and also dump some debug statistics on the driver side that
> could potentially help in debug of why the remoteproc crashed.
> 
> Validated the event in IPQ9574 and IPQ5332 by forcing the rproc to crash
> and ensuring the registered notifier function receives the notification
> in IRQ context.
> 
> The client driver in this scenario is a Wi-Fi driver which is continuously
> queuing data to the remoteproc and needs to know if remoteproc crashes
> as soon as possible to stop queuing further data and also dump some 
> debug statistics on the driver side that could potentially help in
> debug of why the remoteproc crashed.
> 
> Signed-off-by: Vignesh Viswanathan <[email protected]>
> Acked-by: Mukesh Ojha <[email protected]>
> Signed-off-by: Gokul Sriram Palanisamy <[email protected]>
> ---
> changes since v1:
>       - No changes. Rebased on top of linux-next.
>       - We will now have a user for this notifier, ath12k.
> 
>  drivers/remoteproc/qcom_common.c      | 60 +++++++++++++++++++++++++++
>  drivers/remoteproc/remoteproc_core.c  | 12 ++++++
>  include/linux/remoteproc.h            |  3 ++
>  include/linux/remoteproc/qcom_rproc.h | 17 ++++++++
>  4 files changed, 92 insertions(+)

Core review comments below assisted by Claude:claude-sonnet-4-6
> 
> diff --git a/drivers/remoteproc/qcom_common.c 
> b/drivers/remoteproc/qcom_common.c
> index 8c8688f99f0a..6efee422cc23 100644
> --- a/drivers/remoteproc/qcom_common.c
> +++ b/drivers/remoteproc/qcom_common.c
> @@ -86,6 +86,7 @@ struct minidump_global_toc {
>  struct qcom_ssr_subsystem {
>       const char *name;
>       struct srcu_notifier_head notifier_list;
> +     struct atomic_notifier_head atomic_notifier_list;
>       struct list_head list;
>  };
>  
> @@ -377,6 +378,7 @@ static struct qcom_ssr_subsystem 
> *qcom_ssr_get_subsys(const char *name)
>       }
>       info->name = kstrdup_const(name, GFP_KERNEL);
>       srcu_init_notifier_head(&info->notifier_list);
> +     ATOMIC_INIT_NOTIFIER_HEAD(&info->atomic_notifier_list);
>  
>       /* Add to global notification list */
>       list_add_tail(&info->list, &qcom_ssr_subsystem_list);
> @@ -428,6 +430,51 @@ int qcom_unregister_ssr_notifier(void *notify, struct 
> notifier_block *nb)
>  }
>  EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
>  
> +/**
> + * qcom_register_ssr_atomic_notifier() - register SSR Atomic notification
> + *                                    handler
> + * @name:    Subsystem's SSR name
> + * @nb:      notifier_block to be invoked upon subsystem's state change
> + *
> + * This registers the @nb notifier block as part the atomic notifier
> + * chain for a remoteproc associated with @name. The notifier block's 
> callback
> + * will be invoked when the remote processor crashes in atomic context before
> + * the recovery process is queued.
> + *
> + * Return: a subsystem cookie on success, ERR_PTR on failure.
> + */
> +void *qcom_register_ssr_atomic_notifier(const char *name,
> +                                     struct notifier_block *nb)
> +{
> +     struct qcom_ssr_subsystem *info;
> +
> +     info = qcom_ssr_get_subsys(name);
> +     if (IS_ERR(info))
> +             return info;
> +
> +     atomic_notifier_chain_register(&info->atomic_notifier_list, nb);
> +
> +     return &info->atomic_notifier_list;
> +}
> +EXPORT_SYMBOL_GPL(qcom_register_ssr_atomic_notifier);
> +
> +/**
> + * qcom_unregister_ssr_atomic_notifier() - unregister SSR Atomic notification
> + *                                      handler
> + * @notify:  subsystem cookie returned from qcom_register_ssr_notifier

@notify kdoc parameter references wrong function name

Should be qcom_register_ssr_atomic_notifier. The two notifier chains are
independent; notify from qcom_register_ssr_notifier points to
&info->notifier_list (SRCU), while notify from
qcom_register_ssr_atomic_notifier points to &info->atomic_notifier_list.
Passing the wrong cookie to atomic_notifier_chain_unregister would corrupt the
wrong list. The doc misleads callers about which cookie to pass.

> + * @nb:              notifier_block to unregister
> + *
> + * This function will unregister the notifier from the atomic notifier
> + * chain.
> + *
> + * Return: 0 on success, %ENOENT otherwise.
> + */
> +int qcom_unregister_ssr_atomic_notifier(void *notify, struct notifier_block 
> *nb)
> +{
> +     return atomic_notifier_chain_unregister(notify, nb);
> +}
> +EXPORT_SYMBOL_GPL(qcom_unregister_ssr_atomic_notifier);
> +
>  static int ssr_notify_prepare(struct rproc_subdev *subdev)
>  {
>       struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
> @@ -478,6 +525,18 @@ static void ssr_notify_unprepare(struct rproc_subdev 
> *subdev)
>                                QCOM_SSR_AFTER_SHUTDOWN, &data);
>  }
>  
> +static void ssr_notify_crash(struct rproc_subdev *subdev)
> +{
> +     struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
> +     struct qcom_ssr_notify_data data = {
> +             .name = ssr->info->name,
> +             .crashed = true,
> +     };
> +
> +     atomic_notifier_call_chain(&ssr->info->atomic_notifier_list,
> +                                QCOM_SSR_NOTIFY_CRASH, &data);
> +}
> +
>  /**
>   * qcom_add_ssr_subdev() - register subdevice as restart notification source
>   * @rproc:   rproc handle
> @@ -504,6 +563,7 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct 
> qcom_rproc_ssr *ssr,
>       ssr->subdev.start = ssr_notify_start;
>       ssr->subdev.stop = ssr_notify_stop;
>       ssr->subdev.unprepare = ssr_notify_unprepare;
> +     ssr->subdev.notify_crash = ssr_notify_crash;
>  
>       rproc_add_subdev(rproc, &ssr->subdev);
>  }
> diff --git a/drivers/remoteproc/remoteproc_core.c 
> b/drivers/remoteproc/remoteproc_core.c
> index f276956f2c5c..1828831e0e77 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -1140,6 +1140,16 @@ static void rproc_unprepare_subdevices(struct rproc 
> *rproc)
>       }
>  }
>  
> +static void rproc_notify_crash_subdevices(struct rproc *rproc)
> +{
> +     struct rproc_subdev *subdev;
> +
> +     list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
> +             if (subdev->notify_crash)
> +                     subdev->notify_crash(subdev);
> +     }
> +}
> +
>  /**
>   * rproc_alloc_registered_carveouts() - allocate all carveouts registered
>   * in the list
> @@ -2712,6 +2722,8 @@ void rproc_report_crash(struct rproc *rproc, enum 
> rproc_crash_type type)
>       dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>               rproc->name, rproc_crash_to_string(type));
>  
> +     rproc_notify_crash_subdevices(rproc);
> +
>       queue_work(rproc_recovery_wq, &rproc->crash_handler);
>  }
>  EXPORT_SYMBOL(rproc_report_crash);
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index b4795698d8c2..5101a7ec01bb 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -596,6 +596,8 @@ struct rproc {
>   * @stop: stop function, called before the rproc is stopped; the @crashed
>   *       parameter indicates if this originates from a recovery
>   * @unprepare: unprepare function, called after the rproc has been stopped
> + * @notify_crash: notify_crash function, called in atomic context to notify
> + *             rproc has crashed and recovery is about to start
>   */
>  struct rproc_subdev {
>       struct list_head node;
> @@ -604,6 +606,7 @@ struct rproc_subdev {
>       int (*start)(struct rproc_subdev *subdev);
>       void (*stop)(struct rproc_subdev *subdev, bool crashed);
>       void (*unprepare)(struct rproc_subdev *subdev);
> +     void (*notify_crash)(struct rproc_subdev *subdev);
>  };
>  
>  /* we currently support only two vrings per rvdev */
> diff --git a/include/linux/remoteproc/qcom_rproc.h 
> b/include/linux/remoteproc/qcom_rproc.h
> index 82b211518136..f3d06900f297 100644
> --- a/include/linux/remoteproc/qcom_rproc.h
> +++ b/include/linux/remoteproc/qcom_rproc.h
> @@ -11,12 +11,14 @@ struct notifier_block;
>   * @QCOM_SSR_AFTER_POWERUP:  Remoteproc is running (start stage)
>   * @QCOM_SSR_BEFORE_SHUTDOWN:        Remoteproc crashed or shutting down 
> (stop stage)
>   * @QCOM_SSR_AFTER_SHUTDOWN: Remoteproc is down (unprepare stage)
> + * @QCOM_SSR_NOTIFY_CRASH:   Remoteproc crashed
>   */
>  enum qcom_ssr_notify_type {
>       QCOM_SSR_BEFORE_POWERUP,
>       QCOM_SSR_AFTER_POWERUP,
>       QCOM_SSR_BEFORE_SHUTDOWN,
>       QCOM_SSR_AFTER_SHUTDOWN,
> +     QCOM_SSR_NOTIFY_CRASH,
>  };
>  
>  struct qcom_ssr_notify_data {
> @@ -29,6 +31,10 @@ struct qcom_ssr_notify_data {
>  void *qcom_register_ssr_notifier(const char *name, struct notifier_block 
> *nb);
>  int qcom_unregister_ssr_notifier(void *notify, struct notifier_block *nb);
>  
> +void *qcom_register_ssr_atomic_notifier(const char *name,
> +                                     struct notifier_block *nb);
> +int qcom_unregister_ssr_atomic_notifier(void *notify,
> +                                     struct notifier_block *nb);
>  #else
>  
>  static inline void *qcom_register_ssr_notifier(const char *name,
> @@ -43,6 +49,17 @@ static inline int qcom_unregister_ssr_notifier(void 
> *notify,
>       return 0;
>  }
>  
> +static inline void *qcom_register_ssr_atomic_notifier(const char *name,
> +                                                   struct notifier_block *nb)
> +{
> +     return 0;

qcom_register_ssr_atomic_notifier disabled stub returns integer 0 instead of 
NULL

The function return type is void *. Returning integer 0 rather than NULL will
trigger a -Wint-conversion compiler warning in kernel builds (return makes
pointer from integer without a cast). The immediately preceding sibling stub
qcom_register_ssr_notifier correctly returns NULL.

Fix: return NULL;


> +}
> +
> +static inline int qcom_unregister_ssr_atomic_notifier(void *notify,
> +                                                   struct notifier_block *nb)
> +{
> +     return 0;
> +}
>  #endif
>  
>  #endif


Reply via email to