On Thu, Nov 13, 2025 at 07:44:04AM -0800, Tanmay Shah wrote: > Remote processor will report the crash reason via the resource table > and notify the host via kick. The host checks this crash reason on > every kick notification from the remote and report to the core > framework. Then the rproc core framework will start the recovery > process.
Please substitute the word "kick" for "mailbox notification". I also have to assume "core framework" and "rproc core framework" are the same. Pick one and stick with it. > > Signed-off-by: Tanmay Shah <[email protected]> > --- > > Changes in v2: > - clear attach recovery boot flag during detach and stop ops > > drivers/remoteproc/xlnx_r5_remoteproc.c | 56 +++++++++++++++++++++++++ > 1 file changed, 56 insertions(+) > > diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c > b/drivers/remoteproc/xlnx_r5_remoteproc.c > index 8677b732ad14..5d04e8c0dc52 100644 > --- a/drivers/remoteproc/xlnx_r5_remoteproc.c > +++ b/drivers/remoteproc/xlnx_r5_remoteproc.c > @@ -108,6 +108,10 @@ struct rsc_tbl_data { > const uintptr_t rsc_tbl; > } __packed; > > +enum fw_vendor_rsc { > + FW_RSC_VENDOR_CRASH_REASON = RSC_VENDOR_START, > +}; > + > /* > * Hardcoded TCM bank values. This will stay in driver to maintain backward > * compatibility with device-tree that does not have TCM information. > @@ -127,9 +131,21 @@ static const struct mem_bank_data > zynqmp_tcm_banks_lockstep[] = { > {0xffe30000UL, 0x30000, 0x10000UL, PD_R5_1_BTCM, "btcm1"}, > }; > > +/** > + * struct xlnx_rproc_crash_report - resource to know crash status and reason > + * > + * @crash_state: if true, the rproc is notifying crash, time to recover > + * @crash_reason: reason of crash > + */ > +struct xlnx_rproc_crash_report { > + u32 crash_state; > + u32 crash_reason; > +} __packed; > + > /** > * struct zynqmp_r5_core - remoteproc core's internal data > * > + * @crash_report: rproc crash state and reason > * @rsc_tbl_va: resource table virtual address > * @sram: Array of sram memories assigned to this core > * @num_sram: number of sram for this core > @@ -143,6 +159,7 @@ static const struct mem_bank_data > zynqmp_tcm_banks_lockstep[] = { > * @ipi: pointer to mailbox information > */ > struct zynqmp_r5_core { > + struct xlnx_rproc_crash_report *crash_report; > void __iomem *rsc_tbl_va; > struct zynqmp_sram_bank *sram; > int num_sram; > @@ -227,10 +244,14 @@ static void handle_event_notified(struct work_struct > *work) > static void zynqmp_r5_mb_rx_cb(struct mbox_client *cl, void *msg) > { > struct zynqmp_ipi_message *ipi_msg, *buf_msg; > + struct zynqmp_r5_core *r5_core; > + struct rproc *rproc; > struct mbox_info *ipi; > size_t len; > > ipi = container_of(cl, struct mbox_info, mbox_cl); > + r5_core = ipi->r5_core; > + rproc = r5_core->rproc; > > /* copy data from ipi buffer to r5_core */ > ipi_msg = (struct zynqmp_ipi_message *)msg; > @@ -244,6 +265,13 @@ static void zynqmp_r5_mb_rx_cb(struct mbox_client *cl, > void *msg) > buf_msg->len = len; > memcpy(buf_msg->data, ipi_msg->data, len); > > + /* Check for crash only if rproc crash is expected */ > + if (rproc->state == RPROC_ATTACHED || rproc->state == RPROC_RUNNING) { > + if (r5_core->crash_report->crash_state) > + rproc_report_crash(rproc, > + r5_core->crash_report->crash_reason); At this stage ->crash_state indicates that a crash occured, but how is it reset once the crash has been handle? How do we make sure the next mailbox notification won't trigger another crash report? > + } > + > /* received and processed interrupt ack */ > if (mbox_send_message(ipi->rx_chan, NULL) < 0) > dev_err(cl->dev, "ack failed to mbox rx_chan\n"); > @@ -397,6 +425,7 @@ static int zynqmp_r5_rproc_start(struct rproc *rproc) > if (ret) > dev_err(r5_core->dev, > "failed to start RPU = 0x%x\n", r5_core->pm_domain_id); > + Spurious change > return ret; > } > > @@ -438,6 +467,8 @@ static int zynqmp_r5_rproc_stop(struct rproc *rproc) > if (ret) > dev_err(r5_core->dev, "core force power down failed\n"); > > + test_and_clear_bit(RPROC_FEAT_ATTACH_ON_RECOVERY, rproc->features); > + > return ret; > } > > @@ -874,6 +905,8 @@ static int zynqmp_r5_get_rsc_table_va(struct > zynqmp_r5_core *r5_core) > > static int zynqmp_r5_attach(struct rproc *rproc) > { > + rproc_set_feature(rproc, RPROC_FEAT_ATTACH_ON_RECOVERY); > + Why can't this be set in probe() and left alone from thereon? > dev_dbg(&rproc->dev, "rproc %d attached\n", rproc->index); > > return 0; > @@ -888,6 +921,8 @@ static int zynqmp_r5_detach(struct rproc *rproc) > */ > zynqmp_r5_rproc_kick(rproc, 0); > > + clear_bit(RPROC_FEAT_ATTACH_ON_RECOVERY, rproc->features); > + I'm not sure why this needs to be done, same comment for zynqmp_r5_rproc_stop(). > return 0; > } > > @@ -896,6 +931,26 @@ static void zynqmp_r5_coredump(struct rproc *rproc) > (void)rproc; > } > > +static int zynqmp_r5_handle_crash_rsc(struct rproc *rproc, void *rsc, > + int offset, int avail) > +{ > + struct zynqmp_r5_core *r5_core = rproc->priv; > + > + r5_core->crash_report = > + (struct xlnx_rproc_crash_report *)(r5_core->rsc_tbl_va + > offset); > + This function is so simple that I would fold it in zynqmp_r5_handle_rsc() below. Thanks, Mathieu > + return RSC_HANDLED; > +} > + > +static int zynqmp_r5_handle_rsc(struct rproc *rproc, u32 rsc_type, void *rsc, > + int offset, int avail) > +{ > + if (rsc_type == FW_RSC_VENDOR_CRASH_REASON) > + return zynqmp_r5_handle_crash_rsc(rproc, rsc, offset, avail); > + > + return RSC_IGNORED; > +} > + > static const struct rproc_ops zynqmp_r5_rproc_ops = { > .prepare = zynqmp_r5_rproc_prepare, > .unprepare = zynqmp_r5_rproc_unprepare, > @@ -911,6 +966,7 @@ static const struct rproc_ops zynqmp_r5_rproc_ops = { > .attach = zynqmp_r5_attach, > .detach = zynqmp_r5_detach, > .coredump = zynqmp_r5_coredump, > + .handle_rsc = zynqmp_r5_handle_rsc, > }; > > /** > -- > 2.34.1 >

