Remote processor will report the crash reason via the resource table
and notify the host via mailbox notification. The host checks this
crash reason on every mailbox notification from the remote and report
to the rproc core framework. Then the rproc core framework will start
the recovery process.

Signed-off-by: Tanmay Shah <[email protected]>
---

changes in v5
  - use local variable to access crash report pointer
  - End crash report string with '\0' without relying on fw

 drivers/remoteproc/xlnx_r5_remoteproc.c | 73 ++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/drivers/remoteproc/xlnx_r5_remoteproc.c 
b/drivers/remoteproc/xlnx_r5_remoteproc.c
index 3349d1877751..86afff9f3b40 100644
--- a/drivers/remoteproc/xlnx_r5_remoteproc.c
+++ b/drivers/remoteproc/xlnx_r5_remoteproc.c
@@ -112,6 +112,10 @@ struct rsc_tbl_data {
        const uintptr_t rsc_tbl;
 } __packed;
 
+enum xlnx_rproc_fw_rsc {
+       XLNX_RPROC_FW_CRASH_REASON = RSC_VENDOR_START,
+};
+
 /*
  * Hardcoded TCM bank values. This will stay in driver to maintain backward
  * compatibility with device-tree that does not have TCM information.
@@ -131,9 +135,27 @@ static const struct mem_bank_data 
zynqmp_tcm_banks_lockstep[] = {
        {0xffe30000UL, 0x30000, 0x10000UL, PD_R5_1_BTCM, "btcm1"},
 };
 
+#define CRASH_REASON_STR_LEN 16
+
+/**
+ * struct xlnx_rproc_crash_report - resource to know crash status and reason
+ *
+ * @version: version of this resource
+ * @crash_state: if true, the rproc is notifying crash, time to recover
+ * @crash_reason: number to describe reason of crash
+ * @crash_reason_str: short string description of crash reason
+ */
+struct xlnx_rproc_crash_report {
+       u8 version;
+       u8 crash_state;
+       u8 crash_reason;
+       char crash_reason_str[CRASH_REASON_STR_LEN];
+} __packed;
+
 /**
  * struct zynqmp_r5_core - remoteproc core's internal data
  *
+ * @crash_report: rproc crash state and reason
  * @rsc_tbl_va: resource table virtual address
  * @sram: Array of sram memories assigned to this core
  * @num_sram: number of sram for this core
@@ -147,6 +169,7 @@ static const struct mem_bank_data 
zynqmp_tcm_banks_lockstep[] = {
  * @ipi: pointer to mailbox information
  */
 struct zynqmp_r5_core {
+       struct xlnx_rproc_crash_report *crash_report;
        void __iomem *rsc_tbl_va;
        struct zynqmp_sram_bank *sram;
        int num_sram;
@@ -204,11 +227,27 @@ static int event_notified_idr_cb(int id, void *ptr, void 
*data)
  */
 static void handle_event_notified(struct work_struct *work)
 {
+       struct xlnx_rproc_crash_report *report;
+       struct zynqmp_r5_core *r5_core;
        struct mbox_info *ipi;
        struct rproc *rproc;
 
        ipi = container_of(work, struct mbox_info, mbox_work);
        rproc = ipi->r5_core->rproc;
+       r5_core = ipi->r5_core;
+       report = r5_core->crash_report;
+
+       /* report crash only if expected */
+       if (report && report->crash_state) {
+               if (rproc->state == RPROC_ATTACHED || rproc->state == 
RPROC_RUNNING) {
+                       report->crash_reason_str[CRASH_REASON_STR_LEN - 1] = 
'\0';
+                       dev_warn(&rproc->dev, "crash reason id: %d %s\n",
+                                report->crash_reason, 
report->crash_reason_str);
+                       rproc_report_crash(rproc, RPROC_FATAL_ERROR);
+                       report->crash_state = false;
+                       return;
+               }
+       }
 
        /*
         * We only use IPI for interrupt. The RPU firmware side may or may
@@ -448,6 +487,13 @@ static int zynqmp_r5_rproc_stop(struct rproc *rproc)
        if (ret)
                dev_err(r5_core->dev, "core force power down failed\n");
 
+       /*
+        * Clear attach on recovery flag during stop operation. The next state
+        * of the remote processor is expected to be "Running" state. In this
+        * state boot recovery method must take place over attach on recovery.
+        */
+       test_and_clear_bit(RPROC_FEAT_ATTACH_ON_RECOVERY, rproc->features);
+
        return ret;
 }
 
@@ -869,6 +915,9 @@ static int zynqmp_r5_get_rsc_table_va(struct zynqmp_r5_core 
*r5_core)
 
 static int zynqmp_r5_attach(struct rproc *rproc)
 {
+       /* Enable attach on recovery method. Clear it during rproc stop. */
+       rproc_set_feature(rproc, RPROC_FEAT_ATTACH_ON_RECOVERY);
+
        dev_dbg(&rproc->dev, "rproc %d attached\n", rproc->index);
 
        return 0;
@@ -883,9 +932,30 @@ static int zynqmp_r5_detach(struct rproc *rproc)
         */
        zynqmp_r5_rproc_kick(rproc, 0);
 
+       clear_bit(RPROC_FEAT_ATTACH_ON_RECOVERY, rproc->features);
+
        return 0;
 }
 
+static int zynqmp_r5_handle_rsc(struct rproc *rproc, u32 rsc_type, void *rsc,
+                               int offset, int avail)
+{
+       struct zynqmp_r5_core *r5_core = rproc->priv;
+       void *rsc_offset = (r5_core->rsc_tbl_va + offset);
+
+       if (rsc_type == XLNX_RPROC_FW_CRASH_REASON) {
+               r5_core->crash_report = rsc_offset;
+               /* reset all values */
+               r5_core->crash_report->crash_state = false;
+               r5_core->crash_report->crash_reason = 0;
+               r5_core->crash_report->crash_reason_str[0] = '\0';
+       } else {
+               return RSC_IGNORED;
+       }
+
+       return RSC_HANDLED;
+}
+
 static const struct rproc_ops zynqmp_r5_rproc_ops = {
        .prepare        = zynqmp_r5_rproc_prepare,
        .unprepare      = zynqmp_r5_rproc_unprepare,
@@ -900,6 +970,7 @@ static const struct rproc_ops zynqmp_r5_rproc_ops = {
        .get_loaded_rsc_table = zynqmp_r5_get_loaded_rsc_table,
        .attach         = zynqmp_r5_attach,
        .detach         = zynqmp_r5_detach,
+       .handle_rsc     = zynqmp_r5_handle_rsc,
 };
 
 /**
@@ -939,7 +1010,7 @@ static struct zynqmp_r5_core 
*zynqmp_r5_alloc_rproc_core(struct device *cdev)
 
        rproc_coredump_set_elf_info(r5_rproc, ELFCLASS32, EM_ARM);
 
-       r5_rproc->recovery_disabled = true;
+       r5_rproc->recovery_disabled = false;
        r5_rproc->has_iommu = false;
        r5_rproc->auto_boot = false;
 
-- 
2.34.1


Reply via email to