Author: mav
Date: Sat Jul 21 21:34:10 2018
New Revision: 336590
URL: https://svnweb.freebsd.org/changeset/base/336590

Log:
  Stop further SCSI recovery attempts after one has failed.
  
  We've got a set of probably damaged hard disks, reporting 0x04,0x02
  ("Logical unit not ready, initializing command required") in response
  to READ CAPACITY(16), where attempts to use START STOP UNIT for recovery
  results in 0x44,0x00 ("Internal target failure") after ~1 second delay.
  As result of all recovery retries, device open attempt took ~3 seconds
  before finally reporting to GEOM that device is opened, but has no media.
  If the open was for writing and since it hasn't formally failed, following
  close triggered GEOM retaste, opening device few more times with respective
  delays.
  
  This change reduces whole time of this cycle from ~12 seconds to ~3 by
  giving up on recovery after the first failure.
  
  Reviewed by:  ken
  MFC after:    2 weeks
  Sponsored by: iXsystems, Inc.

Modified:
  head/sys/cam/cam_periph.c

Modified: head/sys/cam/cam_periph.c
==============================================================================
--- head/sys/cam/cam_periph.c   Sat Jul 21 21:26:38 2018        (r336589)
+++ head/sys/cam/cam_periph.c   Sat Jul 21 21:34:10 2018        (r336590)
@@ -1309,7 +1309,7 @@ camperiphdone(struct cam_periph *periph, union ccb *do
        union ccb      *saved_ccb;
        cam_status      status;
        struct scsi_start_stop_unit *scsi_cmd;
-       int    error_code, sense_key, asc, ascq;
+       int             error = 0, error_code, sense_key, asc, ascq;
 
        scsi_cmd = (struct scsi_start_stop_unit *)
            &done_ccb->csio.cdb_io.cdb_bytes;
@@ -1341,8 +1341,9 @@ camperiphdone(struct cam_periph *periph, union ccb *do
                                goto out;
                        }
                }
-               if (cam_periph_error(done_ccb,
-                   0, SF_RETRY_UA | SF_NO_PRINT) == ERESTART)
+               error = cam_periph_error(done_ccb, 0,
+                   SF_RETRY_UA | SF_NO_PRINT);
+               if (error == ERESTART)
                        goto out;
                if (done_ccb->ccb_h.status & CAM_DEV_QFRZN) {
                        cam_release_devq(done_ccb->ccb_h.path, 0, 0, 0, 0);
@@ -1361,14 +1362,21 @@ camperiphdone(struct cam_periph *periph, union ccb *do
        }
 
        /*
-        * Perform the final retry with the original CCB so that final
-        * error processing is performed by the owner of the CCB.
+        * After recovery action(s) completed, return to the original CCB.
+        * If the recovery CCB has failed, considering its own possible
+        * retries and recovery, assume we are back in state where we have
+        * been originally, but without recovery hopes left.  In such case,
+        * after the final attempt below, we cancel any further retries,
+        * blocking by that also any new recovery attempts for this CCB,
+        * and the result will be the final one returned to the CCB owher.
         */
        saved_ccb = (union ccb *)done_ccb->ccb_h.saved_ccb_ptr;
        bcopy(saved_ccb, done_ccb, sizeof(*done_ccb));
        xpt_free_ccb(saved_ccb);
        if (done_ccb->ccb_h.cbfcnp != camperiphdone)
                periph->flags &= ~CAM_PERIPH_RECOVERY_INPROG;
+       if (error != 0)
+               done_ccb->ccb_h.retry_count = 0;
        xpt_action(done_ccb);
 
 out:
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to