Mark reports that if a previous firmware update is blocked due to a
background ARS then ndctl fails to start another firmware-udpate
request until the platform is rebooted.

Teach 'ndctl update-firmware' to abort previous firmware-update sessions
when '--force' is specified.

Link: https://github.com/pmem/ndctl/issues/155
Link: http://lore.kernel.org/r/[email protected]
Reported-by: Mark Baker <[email protected]>
Tested-by: Mark Baker <[email protected]>
Tested-by: Jane Chu <[email protected]>
Signed-off-by: Dan Williams <[email protected]>
---

Needs the fix from Jane mentioned in the link above, but with that
included Jane and Mark report this works.

 ndctl/dimm.c |  109 ++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 67 insertions(+), 42 deletions(-)

diff --git a/ndctl/dimm.c b/ndctl/dimm.c
index 8e85d692afd3..167c3f1bc7c7 100644
--- a/ndctl/dimm.c
+++ b/ndctl/dimm.c
@@ -504,6 +504,36 @@ out:
        return rc;
 }
 
+static int submit_abort_firmware(struct ndctl_dimm *dimm,
+               struct action_context *actx)
+{
+       struct update_context *uctx = &actx->update;
+       struct ndctl_cmd *cmd;
+       int rc;
+       enum ND_FW_STATUS status;
+
+       cmd = ndctl_dimm_cmd_new_fw_abort(uctx->start);
+       if (!cmd)
+               return -ENXIO;
+
+       rc = ndctl_cmd_submit(cmd);
+       if (rc < 0)
+               goto out;
+
+       status = ndctl_cmd_fw_xlat_firmware_status(cmd);
+       if (!(status & ND_CMD_STATUS_FIN_ABORTED)) {
+               fprintf(stderr,
+                       "Firmware update abort on DIMM %s failed: %#x\n",
+                       ndctl_dimm_get_devname(dimm), status);
+               rc = -ENXIO;
+               goto out;
+       }
+
+out:
+       ndctl_cmd_unref(cmd);
+       return rc;
+}
+
 static int submit_start_firmware_upload(struct ndctl_dimm *dimm,
                struct action_context *actx)
 {
@@ -511,8 +541,8 @@ static int submit_start_firmware_upload(struct ndctl_dimm 
*dimm,
        struct update_context *uctx = &actx->update;
        struct fw_info *fw = &uctx->dimm_fw;
        struct ndctl_cmd *cmd;
-       int rc;
        enum ND_FW_STATUS status;
+       int rc;
 
        cmd = ndctl_dimm_cmd_new_fw_start_update(dimm);
        if (!cmd)
@@ -520,27 +550,46 @@ static int submit_start_firmware_upload(struct ndctl_dimm 
*dimm,
 
        rc = ndctl_cmd_submit(cmd);
        if (rc < 0)
-               return rc;
+               goto err;
 
+       uctx->start = cmd;
        status = ndctl_cmd_fw_xlat_firmware_status(cmd);
        if (status == FW_EBUSY) {
-               err("%s: busy with another firmware update", devname);
-               return -EBUSY;
+               if (param.force) {
+                       rc = submit_abort_firmware(dimm, actx);
+                       if (rc < 0) {
+                               err("%s: busy with another firmware update, "
+                                   "abort failed", devname);
+                               rc = -EBUSY;
+                               goto err;
+                       }
+                       rc = -EAGAIN;
+                       goto err;
+               } else {
+                       err("%s: busy with another firmware update", devname);
+                       rc = -EBUSY;
+                       goto err;
+               }
        }
        if (status != FW_SUCCESS) {
                err("%s: failed to create start context", devname);
-               return -ENXIO;
+               rc = -ENXIO;
+               goto err;
        }
 
        fw->context = ndctl_cmd_fw_start_get_context(cmd);
        if (fw->context == UINT_MAX) {
                err("%s: failed to retrieve start context", devname);
-               return -ENXIO;
+               rc = -ENXIO;
+               goto err;
        }
 
-       uctx->start = cmd;
-
        return 0;
+
+err:
+       uctx->start = NULL;
+       ndctl_cmd_unref(cmd);
+       return rc;
 }
 
 static int get_fw_data_from_file(FILE *file, void *buf, uint32_t len)
@@ -659,36 +708,6 @@ out:
        return rc;
 }
 
-static int submit_abort_firmware(struct ndctl_dimm *dimm,
-               struct action_context *actx)
-{
-       struct update_context *uctx = &actx->update;
-       struct ndctl_cmd *cmd;
-       int rc;
-       enum ND_FW_STATUS status;
-
-       cmd = ndctl_dimm_cmd_new_fw_abort(uctx->start);
-       if (!cmd)
-               return -ENXIO;
-
-       rc = ndctl_cmd_submit(cmd);
-       if (rc < 0)
-               goto out;
-
-       status = ndctl_cmd_fw_xlat_firmware_status(cmd);
-       if (!(status & ND_CMD_STATUS_FIN_ABORTED)) {
-               fprintf(stderr,
-                       "Firmware update abort on DIMM %s failed: %#x\n",
-                       ndctl_dimm_get_devname(dimm), status);
-               rc = -ENXIO;
-               goto out;
-       }
-
-out:
-       ndctl_cmd_unref(cmd);
-       return rc;
-}
-
 static enum ndctl_fwa_state fw_update_arm(struct ndctl_dimm *dimm)
 {
        struct ndctl_bus *bus = ndctl_dimm_get_bus(dimm);
@@ -856,15 +875,21 @@ static int update_firmware(struct ndctl_dimm *dimm,
                struct action_context *actx)
 {
        const char *devname = ndctl_dimm_get_devname(dimm);
-       int rc;
+       int rc, i;
 
        rc = submit_get_firmware_info(dimm, actx);
        if (rc < 0)
                return rc;
 
-       rc = submit_start_firmware_upload(dimm, actx);
-       if (rc < 0)
-               return rc;
+       /* try a few times in the --force and state busy case */
+       for (i = 0; i < 3; i++) {
+               rc = submit_start_firmware_upload(dimm, actx);
+               if (rc == -EAGAIN)
+                       continue;
+               if (rc < 0)
+                       return rc;
+               break;
+       }
 
        if (param.verbose)
                fprintf(stderr, "%s: uploading firmware\n", devname);
_______________________________________________
Linux-nvdimm mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to