On Fri, Dec 24, 2010 at 16:09 +1000, David Gwynne wrote: > i can reliably produce a situation where an io on a disk attached > to mpii(4) never completes. this implements timeouts on scsi io so > we can recover from this situation. > > ok? >
although, you've already committed the change, i have two questions regarding this. please find them inline. > Index: mpii.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/mpii.c,v > retrieving revision 1.35 > diff -u -p -r1.35 mpii.c > --- mpii.c 23 Aug 2010 00:53:36 -0000 1.35 > +++ mpii.c 24 Dec 2010 06:04:38 -0000 > @@ -4448,6 +4466,7 @@ mpii_scsi_cmd(struct scsi_xfer *xs) > DNPRINTF(MPII_D_CMD, "%s: Offset0: 0x%02x\n", DEVNAME(sc), > io->sgl_offset0); > > + timeout_set(&xs->stimeout, mpii_scsi_cmd_tmo, ccb); > if (xs->flags & SCSI_POLL) { > if (mpii_poll(sc, ccb) != 0) { > xs->error = XS_DRIVER_STUFFUP; > @@ -4459,10 +4478,66 @@ mpii_scsi_cmd(struct scsi_xfer *xs) > DNPRINTF(MPII_D_CMD, "%s: mpii_scsi_cmd(): opcode: %02x " > "datalen: %d\n", DEVNAME(sc), xs->cmd->opcode, xs->datalen); > > + timeout_add_msec(&xs->stimeout, xs->timeout); > mpii_start(sc, ccb); > } > > void > +mpii_scsi_cmd_tmo(void *xccb) > +{ > + struct mpii_ccb *ccb = xccb; > + struct mpii_softc *sc = ccb->ccb_sc; > + > + printf("%s: mpii_scsi_cmd_tmo\n", DEVNAME(sc)); > + > + mtx_enter(&sc->sc_ccb_mtx); > + if (ccb->ccb_state == MPII_CCB_QUEUED) { > + ccb->ccb_state = MPII_CCB_TIMEOUT; > + SLIST_INSERT_HEAD(&sc->sc_ccb_tmos, ccb, ccb_link); > + } > + mtx_leave(&sc->sc_ccb_mtx); > + > + scsi_ioh_add(&sc->sc_ccb_tmo_handler); > +} > + > +void > +mpii_scsi_cmd_tmo_handler(void *cookie, void *io) > +{ > + struct mpii_softc *sc = cookie; > + struct mpii_ccb *tccb = io; > + struct mpii_ccb *ccb; > + struct mpii_msg_scsi_task_request *stq; > + > + mtx_enter(&sc->sc_ccb_mtx); > + ccb = SLIST_FIRST(&sc->sc_ccb_tmos); > + if (ccb != NULL) { > + SLIST_REMOVE_HEAD(&sc->sc_ccb_tmos, ccb_link); > + ccb->ccb_state = MPII_CCB_QUEUED; > + } > + /* should remove any other ccbs for the same dev handle */ > + mtx_leave(&sc->sc_ccb_mtx); > + > + if (ccb == NULL) { > + scsi_io_put(&sc->sc_iopool, tccb); > + return; > + } > + > + stq = tccb->ccb_cmd; > + stq->function = MPII_FUNCTION_SCSI_TASK_MGMT; > + stq->task_type = MPII_SCSI_TASK_TARGET_RESET; > + stq->dev_handle = htole16(ccb->ccb_dev_handle); > + why do you issue 'target reset' instead of 'abort task' here? > + tccb->ccb_done = mpii_scsi_cmd_tmo_done; > + mpii_start(sc, tccb); > +} > + > +void > +mpii_scsi_cmd_tmo_done(struct mpii_ccb *tccb) > +{ > + mpii_scsi_cmd_tmo_handler(tccb->ccb_sc, tccb); > +} > + why do you call this function again from here?