On 8/19/2024 11:09 PM, Gokul Sriram Palanisamy wrote:
> From: Vignesh Viswanathan <[email protected]>
>
> Currently the SSR subdevice notifies the client driver on crash of the
> rproc from the recovery workqueue using the BEFORE_SHUTDOWN event.
> However, the client driver might be interested to know that the device
> has crashed immediately to pause any further transactions with the
> rproc. This calls for an event to be sent to the driver in the IRQ
> context as soon as the rproc crashes.
>
> Add NOTIFY_FATAL event to SSR subdevice to atomically notify rproc has
> crashed to the client driver. The client driver in this scenario is a
> ath Wi-Fi driver which is continuously queuing data to the remoteproc and
> needs to know if remoteproc crashes as soon as possible to stop queuing
> further data and also dump some debug statistics on the driver side that
> could potentially help in debug of why the remoteproc crashed.
>
> Validated the event in IPQ9574 and IPQ5332 by forcing the rproc to crash
> and ensuring the registered notifier function receives the notification
> in IRQ context.
>
> The client driver in this scenario is a Wi-Fi driver which is continuously
> queuing data to the remoteproc and needs to know if remoteproc crashes
> as soon as possible to stop queuing further data and also dump some
> debug statistics on the driver side that could potentially help in
> debug of why the remoteproc crashed.
>
> Signed-off-by: Vignesh Viswanathan <[email protected]>
> Acked-by: Mukesh Ojha <[email protected]>
> Signed-off-by: Gokul Sriram Palanisamy <[email protected]>
> ---
> changes since v1:
> - No changes. Rebased on top of linux-next.
> - We will now have a user for this notifier, ath12k.
>
> drivers/remoteproc/qcom_common.c | 60 +++++++++++++++++++++++++++
> drivers/remoteproc/remoteproc_core.c | 12 ++++++
> include/linux/remoteproc.h | 3 ++
> include/linux/remoteproc/qcom_rproc.h | 17 ++++++++
> 4 files changed, 92 insertions(+)
Core review comments below assisted by Claude:claude-sonnet-4-6
>
> diff --git a/drivers/remoteproc/qcom_common.c
> b/drivers/remoteproc/qcom_common.c
> index 8c8688f99f0a..6efee422cc23 100644
> --- a/drivers/remoteproc/qcom_common.c
> +++ b/drivers/remoteproc/qcom_common.c
> @@ -86,6 +86,7 @@ struct minidump_global_toc {
> struct qcom_ssr_subsystem {
> const char *name;
> struct srcu_notifier_head notifier_list;
> + struct atomic_notifier_head atomic_notifier_list;
> struct list_head list;
> };
>
> @@ -377,6 +378,7 @@ static struct qcom_ssr_subsystem
> *qcom_ssr_get_subsys(const char *name)
> }
> info->name = kstrdup_const(name, GFP_KERNEL);
> srcu_init_notifier_head(&info->notifier_list);
> + ATOMIC_INIT_NOTIFIER_HEAD(&info->atomic_notifier_list);
>
> /* Add to global notification list */
> list_add_tail(&info->list, &qcom_ssr_subsystem_list);
> @@ -428,6 +430,51 @@ int qcom_unregister_ssr_notifier(void *notify, struct
> notifier_block *nb)
> }
> EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
>
> +/**
> + * qcom_register_ssr_atomic_notifier() - register SSR Atomic notification
> + * handler
> + * @name: Subsystem's SSR name
> + * @nb: notifier_block to be invoked upon subsystem's state change
> + *
> + * This registers the @nb notifier block as part the atomic notifier
> + * chain for a remoteproc associated with @name. The notifier block's
> callback
> + * will be invoked when the remote processor crashes in atomic context before
> + * the recovery process is queued.
> + *
> + * Return: a subsystem cookie on success, ERR_PTR on failure.
> + */
> +void *qcom_register_ssr_atomic_notifier(const char *name,
> + struct notifier_block *nb)
> +{
> + struct qcom_ssr_subsystem *info;
> +
> + info = qcom_ssr_get_subsys(name);
> + if (IS_ERR(info))
> + return info;
> +
> + atomic_notifier_chain_register(&info->atomic_notifier_list, nb);
> +
> + return &info->atomic_notifier_list;
> +}
> +EXPORT_SYMBOL_GPL(qcom_register_ssr_atomic_notifier);
> +
> +/**
> + * qcom_unregister_ssr_atomic_notifier() - unregister SSR Atomic notification
> + * handler
> + * @notify: subsystem cookie returned from qcom_register_ssr_notifier
@notify kdoc parameter references wrong function name
Should be qcom_register_ssr_atomic_notifier. The two notifier chains are
independent; notify from qcom_register_ssr_notifier points to
&info->notifier_list (SRCU), while notify from
qcom_register_ssr_atomic_notifier points to &info->atomic_notifier_list.
Passing the wrong cookie to atomic_notifier_chain_unregister would corrupt the
wrong list. The doc misleads callers about which cookie to pass.
> + * @nb: notifier_block to unregister
> + *
> + * This function will unregister the notifier from the atomic notifier
> + * chain.
> + *
> + * Return: 0 on success, %ENOENT otherwise.
> + */
> +int qcom_unregister_ssr_atomic_notifier(void *notify, struct notifier_block
> *nb)
> +{
> + return atomic_notifier_chain_unregister(notify, nb);
> +}
> +EXPORT_SYMBOL_GPL(qcom_unregister_ssr_atomic_notifier);
> +
> static int ssr_notify_prepare(struct rproc_subdev *subdev)
> {
> struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
> @@ -478,6 +525,18 @@ static void ssr_notify_unprepare(struct rproc_subdev
> *subdev)
> QCOM_SSR_AFTER_SHUTDOWN, &data);
> }
>
> +static void ssr_notify_crash(struct rproc_subdev *subdev)
> +{
> + struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
> + struct qcom_ssr_notify_data data = {
> + .name = ssr->info->name,
> + .crashed = true,
> + };
> +
> + atomic_notifier_call_chain(&ssr->info->atomic_notifier_list,
> + QCOM_SSR_NOTIFY_CRASH, &data);
> +}
> +
> /**
> * qcom_add_ssr_subdev() - register subdevice as restart notification source
> * @rproc: rproc handle
> @@ -504,6 +563,7 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct
> qcom_rproc_ssr *ssr,
> ssr->subdev.start = ssr_notify_start;
> ssr->subdev.stop = ssr_notify_stop;
> ssr->subdev.unprepare = ssr_notify_unprepare;
> + ssr->subdev.notify_crash = ssr_notify_crash;
>
> rproc_add_subdev(rproc, &ssr->subdev);
> }
> diff --git a/drivers/remoteproc/remoteproc_core.c
> b/drivers/remoteproc/remoteproc_core.c
> index f276956f2c5c..1828831e0e77 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -1140,6 +1140,16 @@ static void rproc_unprepare_subdevices(struct rproc
> *rproc)
> }
> }
>
> +static void rproc_notify_crash_subdevices(struct rproc *rproc)
> +{
> + struct rproc_subdev *subdev;
> +
> + list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
> + if (subdev->notify_crash)
> + subdev->notify_crash(subdev);
> + }
> +}
> +
> /**
> * rproc_alloc_registered_carveouts() - allocate all carveouts registered
> * in the list
> @@ -2712,6 +2722,8 @@ void rproc_report_crash(struct rproc *rproc, enum
> rproc_crash_type type)
> dev_err(&rproc->dev, "crash detected in %s: type %s\n",
> rproc->name, rproc_crash_to_string(type));
>
> + rproc_notify_crash_subdevices(rproc);
> +
> queue_work(rproc_recovery_wq, &rproc->crash_handler);
> }
> EXPORT_SYMBOL(rproc_report_crash);
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index b4795698d8c2..5101a7ec01bb 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -596,6 +596,8 @@ struct rproc {
> * @stop: stop function, called before the rproc is stopped; the @crashed
> * parameter indicates if this originates from a recovery
> * @unprepare: unprepare function, called after the rproc has been stopped
> + * @notify_crash: notify_crash function, called in atomic context to notify
> + * rproc has crashed and recovery is about to start
> */
> struct rproc_subdev {
> struct list_head node;
> @@ -604,6 +606,7 @@ struct rproc_subdev {
> int (*start)(struct rproc_subdev *subdev);
> void (*stop)(struct rproc_subdev *subdev, bool crashed);
> void (*unprepare)(struct rproc_subdev *subdev);
> + void (*notify_crash)(struct rproc_subdev *subdev);
> };
>
> /* we currently support only two vrings per rvdev */
> diff --git a/include/linux/remoteproc/qcom_rproc.h
> b/include/linux/remoteproc/qcom_rproc.h
> index 82b211518136..f3d06900f297 100644
> --- a/include/linux/remoteproc/qcom_rproc.h
> +++ b/include/linux/remoteproc/qcom_rproc.h
> @@ -11,12 +11,14 @@ struct notifier_block;
> * @QCOM_SSR_AFTER_POWERUP: Remoteproc is running (start stage)
> * @QCOM_SSR_BEFORE_SHUTDOWN: Remoteproc crashed or shutting down
> (stop stage)
> * @QCOM_SSR_AFTER_SHUTDOWN: Remoteproc is down (unprepare stage)
> + * @QCOM_SSR_NOTIFY_CRASH: Remoteproc crashed
> */
> enum qcom_ssr_notify_type {
> QCOM_SSR_BEFORE_POWERUP,
> QCOM_SSR_AFTER_POWERUP,
> QCOM_SSR_BEFORE_SHUTDOWN,
> QCOM_SSR_AFTER_SHUTDOWN,
> + QCOM_SSR_NOTIFY_CRASH,
> };
>
> struct qcom_ssr_notify_data {
> @@ -29,6 +31,10 @@ struct qcom_ssr_notify_data {
> void *qcom_register_ssr_notifier(const char *name, struct notifier_block
> *nb);
> int qcom_unregister_ssr_notifier(void *notify, struct notifier_block *nb);
>
> +void *qcom_register_ssr_atomic_notifier(const char *name,
> + struct notifier_block *nb);
> +int qcom_unregister_ssr_atomic_notifier(void *notify,
> + struct notifier_block *nb);
> #else
>
> static inline void *qcom_register_ssr_notifier(const char *name,
> @@ -43,6 +49,17 @@ static inline int qcom_unregister_ssr_notifier(void
> *notify,
> return 0;
> }
>
> +static inline void *qcom_register_ssr_atomic_notifier(const char *name,
> + struct notifier_block *nb)
> +{
> + return 0;
qcom_register_ssr_atomic_notifier disabled stub returns integer 0 instead of
NULL
The function return type is void *. Returning integer 0 rather than NULL will
trigger a -Wint-conversion compiler warning in kernel builds (return makes
pointer from integer without a cast). The immediately preceding sibling stub
qcom_register_ssr_notifier correctly returns NULL.
Fix: return NULL;
> +}
> +
> +static inline int qcom_unregister_ssr_atomic_notifier(void *notify,
> + struct notifier_block *nb)
> +{
> + return 0;
> +}
> #endif
>
> #endif