Re: timeout io on mpii(4)
On Fri, Dec 24, 2010 at 16:09 +1000, David Gwynne wrote: i can reliably produce a situation where an io on a disk attached to mpii(4) never completes. this implements timeouts on scsi io so we can recover from this situation. ok? although, you've already committed the change, i have two questions regarding this. please find them inline. Index: mpii.c === RCS file: /cvs/src/sys/dev/pci/mpii.c,v retrieving revision 1.35 diff -u -p -r1.35 mpii.c --- mpii.c23 Aug 2010 00:53:36 - 1.35 +++ mpii.c24 Dec 2010 06:04:38 - @@ -4448,6 +4466,7 @@ mpii_scsi_cmd(struct scsi_xfer *xs) DNPRINTF(MPII_D_CMD, %s: Offset0: 0x%02x\n, DEVNAME(sc), io-sgl_offset0); + timeout_set(xs-stimeout, mpii_scsi_cmd_tmo, ccb); if (xs-flags SCSI_POLL) { if (mpii_poll(sc, ccb) != 0) { xs-error = XS_DRIVER_STUFFUP; @@ -4459,10 +4478,66 @@ mpii_scsi_cmd(struct scsi_xfer *xs) DNPRINTF(MPII_D_CMD, %s:mpii_scsi_cmd(): opcode: %02x datalen: %d\n, DEVNAME(sc), xs-cmd-opcode, xs-datalen); + timeout_add_msec(xs-stimeout, xs-timeout); mpii_start(sc, ccb); } void +mpii_scsi_cmd_tmo(void *xccb) +{ + struct mpii_ccb *ccb = xccb; + struct mpii_softc *sc = ccb-ccb_sc; + + printf(%s: mpii_scsi_cmd_tmo\n, DEVNAME(sc)); + + mtx_enter(sc-sc_ccb_mtx); + if (ccb-ccb_state == MPII_CCB_QUEUED) { + ccb-ccb_state = MPII_CCB_TIMEOUT; + SLIST_INSERT_HEAD(sc-sc_ccb_tmos, ccb, ccb_link); + } + mtx_leave(sc-sc_ccb_mtx); + + scsi_ioh_add(sc-sc_ccb_tmo_handler); +} + +void +mpii_scsi_cmd_tmo_handler(void *cookie, void *io) +{ + struct mpii_softc *sc = cookie; + struct mpii_ccb *tccb = io; + struct mpii_ccb *ccb; + struct mpii_msg_scsi_task_request *stq; + + mtx_enter(sc-sc_ccb_mtx); + ccb = SLIST_FIRST(sc-sc_ccb_tmos); + if (ccb != NULL) { + SLIST_REMOVE_HEAD(sc-sc_ccb_tmos, ccb_link); + ccb-ccb_state = MPII_CCB_QUEUED; + } + /* should remove any other ccbs for the same dev handle */ + mtx_leave(sc-sc_ccb_mtx); + + if (ccb == NULL) { + scsi_io_put(sc-sc_iopool, tccb); + return; + } + + stq = tccb-ccb_cmd; + stq-function = MPII_FUNCTION_SCSI_TASK_MGMT; + stq-task_type = MPII_SCSI_TASK_TARGET_RESET; + stq-dev_handle = htole16(ccb-ccb_dev_handle); + why do you issue 'target reset' instead of 'abort task' here? + tccb-ccb_done = mpii_scsi_cmd_tmo_done; + mpii_start(sc, tccb); +} + +void +mpii_scsi_cmd_tmo_done(struct mpii_ccb *tccb) +{ + mpii_scsi_cmd_tmo_handler(tccb-ccb_sc, tccb); +} + why do you call this function again from here?
Re: timeout io on mpii(4)
On 31/12/2010, at 3:51 AM, Mike Belopuhov wrote: On Fri, Dec 24, 2010 at 16:09 +1000, David Gwynne wrote: i can reliably produce a situation where an io on a disk attached to mpii(4) never completes. this implements timeouts on scsi io so we can recover from this situation. ok? although, you've already committed the change, i have two questions regarding this. please find them inline. cool :) answers inline too. Index: mpii.c === RCS file: /cvs/src/sys/dev/pci/mpii.c,v retrieving revision 1.35 diff -u -p -r1.35 mpii.c --- mpii.c 23 Aug 2010 00:53:36 - 1.35 +++ mpii.c 24 Dec 2010 06:04:38 - @@ -4448,6 +4466,7 @@ mpii_scsi_cmd(struct scsi_xfer *xs) DNPRINTF(MPII_D_CMD, %s: Offset0: 0x%02x\n, DEVNAME(sc), io-sgl_offset0); +timeout_set(xs-stimeout, mpii_scsi_cmd_tmo, ccb); if (xs-flags SCSI_POLL) { if (mpii_poll(sc, ccb) != 0) { xs-error = XS_DRIVER_STUFFUP; @@ -4459,10 +4478,66 @@ mpii_scsi_cmd(struct scsi_xfer *xs) DNPRINTF(MPII_D_CMD, %s:mpii_scsi_cmd(): opcode: %02x datalen: %d\n, DEVNAME(sc), xs-cmd-opcode, xs-datalen); +timeout_add_msec(xs-stimeout, xs-timeout); mpii_start(sc, ccb); } void +mpii_scsi_cmd_tmo(void *xccb) +{ +struct mpii_ccb *ccb = xccb; +struct mpii_softc *sc = ccb-ccb_sc; + +printf(%s: mpii_scsi_cmd_tmo\n, DEVNAME(sc)); + +mtx_enter(sc-sc_ccb_mtx); +if (ccb-ccb_state == MPII_CCB_QUEUED) { +ccb-ccb_state = MPII_CCB_TIMEOUT; +SLIST_INSERT_HEAD(sc-sc_ccb_tmos, ccb, ccb_link); +} +mtx_leave(sc-sc_ccb_mtx); + +scsi_ioh_add(sc-sc_ccb_tmo_handler); +} + +void +mpii_scsi_cmd_tmo_handler(void *cookie, void *io) +{ +struct mpii_softc *sc = cookie; +struct mpii_ccb *tccb = io; +struct mpii_ccb *ccb; +struct mpii_msg_scsi_task_request *stq; + +mtx_enter(sc-sc_ccb_mtx); +ccb = SLIST_FIRST(sc-sc_ccb_tmos); +if (ccb != NULL) { +SLIST_REMOVE_HEAD(sc-sc_ccb_tmos, ccb_link); +ccb-ccb_state = MPII_CCB_QUEUED; +} +/* should remove any other ccbs for the same dev handle */ +mtx_leave(sc-sc_ccb_mtx); + +if (ccb == NULL) { +scsi_io_put(sc-sc_iopool, tccb); +return; +} + +stq = tccb-ccb_cmd; +stq-function = MPII_FUNCTION_SCSI_TASK_MGMT; +stq-task_type = MPII_SCSI_TASK_TARGET_RESET; +stq-dev_handle = htole16(ccb-ccb_dev_handle); + why do you issue 'target reset' instead of 'abort task' here? thats what solaris and linux do. +tccb-ccb_done = mpii_scsi_cmd_tmo_done; +mpii_start(sc, tccb); +} + +void +mpii_scsi_cmd_tmo_done(struct mpii_ccb *tccb) +{ +mpii_scsi_cmd_tmo_handler(tccb-ccb_sc, tccb); +} + why do you call this function again from here? xs timeouts put the xs on a list to be serviced. the io handler services that list. the done function calling the handler again is the way it pulls the next one off the list. dlg
Re: timeout io on mpii(4)
This is great but the real question is why does the IO get jammed? On Fri, Dec 24, 2010 at 04:09:23PM +1000, David Gwynne wrote: i can reliably produce a situation where an io on a disk attached to mpii(4) never completes. this implements timeouts on scsi io so we can recover from this situation. ok? Index: mpii.c === RCS file: /cvs/src/sys/dev/pci/mpii.c,v retrieving revision 1.35 diff -u -p -r1.35 mpii.c --- mpii.c23 Aug 2010 00:53:36 - 1.35 +++ mpii.c24 Dec 2010 06:04:38 - @@ -1757,7 +1757,8 @@ struct mpii_ccb { volatile enum { MPII_CCB_FREE, MPII_CCB_READY, - MPII_CCB_QUEUED + MPII_CCB_QUEUED, + MPII_CCB_TIMEOUT } ccb_state; void(*ccb_done)(struct mpii_ccb *); @@ -1822,6 +1823,15 @@ struct mpii_softc { struct mpii_ccb_listsc_ccb_free; struct mutexsc_ccb_free_mtx; + struct mutexsc_ccb_mtx; + /* + * this protects the ccb state and list entry + * between mpii_scsi_cmd and scsidone. + */ + + struct mpii_ccb_listsc_ccb_tmos; + struct scsi_iohandler sc_ccb_tmo_handler; + struct scsi_iopool sc_iopool; struct mpii_dmamem *sc_requests; @@ -1894,6 +1904,10 @@ intmpii_alloc_queues(struct mpii_softc void mpii_push_reply(struct mpii_softc *, struct mpii_rcb *); void mpii_push_replies(struct mpii_softc *); +void mpii_scsi_cmd_tmo(void *); +void mpii_scsi_cmd_tmo_handler(void *, void *); +void mpii_scsi_cmd_tmo_done(struct mpii_ccb *); + int mpii_alloc_dev(struct mpii_softc *); int mpii_insert_dev(struct mpii_softc *, struct mpii_device *); int mpii_remove_dev(struct mpii_softc *, struct mpii_device *); @@ -4035,7 +4049,11 @@ mpii_alloc_ccbs(struct mpii_softc *sc) int i; SLIST_INIT(sc-sc_ccb_free); + SLIST_INIT(sc-sc_ccb_tmos); mtx_init(sc-sc_ccb_free_mtx, IPL_BIO); + mtx_init(sc-sc_ccb_mtx, IPL_BIO); + scsi_ioh_set(sc-sc_ccb_tmo_handler, sc-sc_iopool, + mpii_scsi_cmd_tmo_handler, sc); sc-sc_ccbs = malloc(sizeof(*ccb) * (sc-sc_request_depth-1), M_DEVBUF, M_NOWAIT | M_ZERO); @@ -4448,6 +4466,7 @@ mpii_scsi_cmd(struct scsi_xfer *xs) DNPRINTF(MPII_D_CMD, %s: Offset0: 0x%02x\n, DEVNAME(sc), io-sgl_offset0); + timeout_set(xs-stimeout, mpii_scsi_cmd_tmo, ccb); if (xs-flags SCSI_POLL) { if (mpii_poll(sc, ccb) != 0) { xs-error = XS_DRIVER_STUFFUP; @@ -4459,10 +4478,66 @@ mpii_scsi_cmd(struct scsi_xfer *xs) DNPRINTF(MPII_D_CMD, %s:mpii_scsi_cmd(): opcode: %02x datalen: %d\n, DEVNAME(sc), xs-cmd-opcode, xs-datalen); + timeout_add_msec(xs-stimeout, xs-timeout); mpii_start(sc, ccb); } void +mpii_scsi_cmd_tmo(void *xccb) +{ + struct mpii_ccb *ccb = xccb; + struct mpii_softc *sc = ccb-ccb_sc; + + printf(%s: mpii_scsi_cmd_tmo\n, DEVNAME(sc)); + + mtx_enter(sc-sc_ccb_mtx); + if (ccb-ccb_state == MPII_CCB_QUEUED) { + ccb-ccb_state = MPII_CCB_TIMEOUT; + SLIST_INSERT_HEAD(sc-sc_ccb_tmos, ccb, ccb_link); + } + mtx_leave(sc-sc_ccb_mtx); + + scsi_ioh_add(sc-sc_ccb_tmo_handler); +} + +void +mpii_scsi_cmd_tmo_handler(void *cookie, void *io) +{ + struct mpii_softc *sc = cookie; + struct mpii_ccb *tccb = io; + struct mpii_ccb *ccb; + struct mpii_msg_scsi_task_request *stq; + + mtx_enter(sc-sc_ccb_mtx); + ccb = SLIST_FIRST(sc-sc_ccb_tmos); + if (ccb != NULL) { + SLIST_REMOVE_HEAD(sc-sc_ccb_tmos, ccb_link); + ccb-ccb_state = MPII_CCB_QUEUED; + } + /* should remove any other ccbs for the same dev handle */ + mtx_leave(sc-sc_ccb_mtx); + + if (ccb == NULL) { + scsi_io_put(sc-sc_iopool, tccb); + return; + } + + stq = tccb-ccb_cmd; + stq-function = MPII_FUNCTION_SCSI_TASK_MGMT; + stq-task_type = MPII_SCSI_TASK_TARGET_RESET; + stq-dev_handle = htole16(ccb-ccb_dev_handle); + + tccb-ccb_done = mpii_scsi_cmd_tmo_done; + mpii_start(sc, tccb); +} + +void +mpii_scsi_cmd_tmo_done(struct mpii_ccb *tccb) +{ + mpii_scsi_cmd_tmo_handler(tccb-ccb_sc, tccb); +} + +void mpii_scsi_cmd_done(struct mpii_ccb *ccb) { struct mpii_msg_scsi_io_error *sie; @@ -4470,6 +4545,14 @@ mpii_scsi_cmd_done(struct mpii_ccb *ccb) struct scsi_xfer*xs = ccb-ccb_cookie; struct