On 10/14/2010 10:30 AM, Zou, Yi wrote: >> >> Use the rport value for rec_tov for timeout values when >> sending fcp commands. Currently, defaults are being used >> which may or may not match the advertised values. >> >> The default may cause i/o to timeout on networks that >> set this value larger then the default value. To make >> the timeout more configurable in the non-REC mode we >> remove the FC_SCSI_ER_TIMEOUT completely allowing the >> scsi-ml to do the timeout. This removes an unneeded >> timer and allows the i/o timeout to be configured >> using the scsi-ml knobs. > I believe FC_SCSI_ER_TIMEOUT is for recovery abort so we > return DID_BUS_BUSY to allow the same scsi cmmd to be > requeued when there is no REC. Taking this out, plus your > patch 1/3 of always returning DID_ABORT/DID_ERROR basically > removes recovery abort in fc_fcp. >
This removes recovery in fc_fcp however by returning correct DID_xyz error codes we should be able to handle the recovery through the scsi-ml ie scsi will requeue the scsi cmd. The cmd will be requeued after the scsi layer timeout has occured and if the scsi max_retries is hit the cmd will be aborted and fc_eh_abort will be called. >From my point of view this allows the user to use the scsi knobs to control the timeout values and simplifies the libfc code. As a reference point this investigation started with i/o timing out on busy networks and mpio enabling fail fast was causing many aborts to occur and paths to failed. Without a knob to tune the timeout value we can't account for networks that consistently respond in >10s. > However, I don't understand why this is 10s to start with. > The default sd timeout is 30s, so it seems before we are able > to max of scmd->allowed retry through the recovery abort by > FC_CMD_RECOVERY, we will hit lun_reset that would eventually > still return DID_ERROR to scsi-ml, i.e., failing that i/o > in the 3rd retry. However, I also did not find any value in > the fcp3 spec about how long to wait to do recovery abort when > sequence level recovery is not available, anyone on the list > may have a better idea? I don't think the timeout is needed, if you want the same 10s behavior set the scsi timeout values accordingly. Although I could be missing something, I am unsure why the timer was added originally. Thanks, John. > > Thanks, > yi > >> >> Signed-off-by: John Fastabend <[email protected]> >> --- >> >> drivers/scsi/libfc/fc_fcp.c | 58 +++++++++++++++++++++++++++++++------- >> ----- >> 1 files changed, 42 insertions(+), 16 deletions(-) >> >> diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c >> index 782f221..b90cefc 100644 >> --- a/drivers/scsi/libfc/fc_fcp.c >> +++ b/drivers/scsi/libfc/fc_fcp.c >> @@ -57,6 +57,9 @@ struct kmem_cache *scsi_pkt_cachep; >> #define FC_SRB_READ (1 << 1) >> #define FC_SRB_WRITE (1 << 0) >> >> +/* constant added to e_d_tov timeout to get rec_tov value */ >> +#define REC_TOV_CONST 1 >> + >> /* >> * The SCp.ptr should be tested and set under the scsi_pkt_queue lock >> */ >> @@ -126,9 +129,7 @@ static void fc_fcp_srr_error(struct fc_fcp_pkt *, >> struct fc_frame *); >> /* >> * Error recovery timeout values. >> */ >> -#define FC_SCSI_ER_TIMEOUT (10 * HZ) >> #define FC_SCSI_TM_TOV (10 * HZ) >> -#define FC_SCSI_REC_TOV (2 * HZ) >> #define FC_HOST_RESET_TIMEOUT (30 * HZ) >> #define FC_CAN_QUEUE_PERIOD (60 * HZ) >> >> @@ -1083,6 +1084,21 @@ static int fc_fcp_pkt_send(struct fc_lport *lport, >> struct fc_fcp_pkt *fsp) >> } >> >> /** >> + * get_fsp_rec_tov() - Helper function to get REC_TOV >> + * @fsp: the FCP packet >> + */ >> +static inline unsigned int get_fsp_rec_tov(struct fc_fcp_pkt *fsp) >> +{ >> + struct fc_rport *rport; >> + struct fc_rport_libfc_priv *rpriv; >> + >> + rport = fsp->rport; >> + rpriv = rport->dd_data; >> + >> + return rpriv->e_d_tov + REC_TOV_CONST; >> +} >> + >> +/** >> * fc_fcp_cmd_send() - Send a FCP command >> * @lport: The local port to send the command on >> * @fsp: The FCP packet the command is on >> @@ -1099,6 +1115,7 @@ static int fc_fcp_cmd_send(struct fc_lport *lport, >> struct fc_fcp_pkt *fsp, >> struct fc_rport_libfc_priv *rpriv; >> const size_t len = sizeof(fsp->cdb_cmd); >> int rc = 0; >> + unsigned int rec_tov; >> >> if (fc_fcp_lock_pkt(fsp)) >> return 0; >> @@ -1129,10 +1146,12 @@ static int fc_fcp_cmd_send(struct fc_lport *lport, >> struct fc_fcp_pkt *fsp, >> fsp->seq_ptr = seq; >> fc_fcp_pkt_hold(fsp); /* hold for fc_fcp_pkt_destroy */ >> >> + rec_tov = get_fsp_rec_tov(fsp); >> + >> setup_timer(&fsp->timer, fc_fcp_timeout, (unsigned long)fsp); >> - fc_fcp_timer_set(fsp, >> - (fsp->tgt_flags & FC_RP_FLAGS_REC_SUPPORTED) ? >> - FC_SCSI_REC_TOV : FC_SCSI_ER_TIMEOUT); >> + >> + if (fsp->tgt_flags & FC_RP_FLAGS_REC_SUPPORTED) >> + fc_fcp_timer_set(fsp, rec_tov); >> unlock: >> fc_fcp_unlock_pkt(fsp); >> return rc; >> @@ -1207,13 +1226,16 @@ static void fc_lun_reset_send(unsigned long data) >> { >> struct fc_fcp_pkt *fsp = (struct fc_fcp_pkt *)data; >> struct fc_lport *lport = fsp->lp; >> + unsigned int rec_tov; >> + >> if (lport->tt.fcp_cmd_send(lport, fsp, fc_tm_done)) { >> if (fsp->recov_retry++ >= FC_MAX_RECOV_RETRY) >> return; >> if (fc_fcp_lock_pkt(fsp)) >> return; >> + rec_tov = get_fsp_rec_tov(fsp); >> setup_timer(&fsp->timer, fc_lun_reset_send, (unsigned >> long)fsp); >> - fc_fcp_timer_set(fsp, FC_SCSI_REC_TOV); >> + fc_fcp_timer_set(fsp, rec_tov); >> fc_fcp_unlock_pkt(fsp); >> } >> } >> @@ -1351,9 +1373,6 @@ static void fc_fcp_timeout(unsigned long data) >> >> if (rpriv->flags & FC_RP_FLAGS_REC_SUPPORTED) >> fc_fcp_rec(fsp); >> - else if (time_after_eq(fsp->last_pkt_time + (FC_SCSI_ER_TIMEOUT / 2), >> - jiffies)) >> - fc_fcp_timer_set(fsp, FC_SCSI_ER_TIMEOUT); >> else if (fsp->state & FC_SRB_RCV_STATUS) >> fc_fcp_complete_locked(fsp); >> else >> @@ -1373,6 +1392,7 @@ static void fc_fcp_rec(struct fc_fcp_pkt *fsp) >> struct fc_frame *fp; >> struct fc_rport *rport; >> struct fc_rport_libfc_priv *rpriv; >> + unsigned int rec_tov; >> >> lport = fsp->lp; >> rport = fsp->rport; >> @@ -1383,6 +1403,9 @@ static void fc_fcp_rec(struct fc_fcp_pkt *fsp) >> fc_fcp_complete_locked(fsp); >> return; >> } >> + >> + rec_tov = get_fsp_rec_tov(fsp); >> + >> fp = fc_fcp_frame_alloc(lport, sizeof(struct fc_els_rec)); >> if (!fp) >> goto retry; >> @@ -1393,13 +1416,13 @@ static void fc_fcp_rec(struct fc_fcp_pkt *fsp) >> FC_FCTL_REQ, 0); >> if (lport->tt.elsct_send(lport, rport->port_id, fp, ELS_REC, >> fc_fcp_rec_resp, fsp, >> - jiffies_to_msecs(FC_SCSI_REC_TOV))) { >> + jiffies_to_msecs(rec_tov))) { >> fc_fcp_pkt_hold(fsp); /* hold while REC outstanding >> */ >> return; >> } >> retry: >> if (fsp->recov_retry++ < FC_MAX_RECOV_RETRY) >> - fc_fcp_timer_set(fsp, FC_SCSI_REC_TOV); >> + fc_fcp_timer_set(fsp, rec_tov); >> else >> fc_fcp_recovery(fsp, FC_TIMED_OUT); >> } >> @@ -1455,7 +1478,6 @@ static void fc_fcp_rec_resp(struct fc_seq *seq, >> struct fc_frame *fp, void *arg) >> * making progress. >> */ >> rpriv->flags &= ~FC_RP_FLAGS_REC_SUPPORTED; >> - fc_fcp_timer_set(fsp, FC_SCSI_ER_TIMEOUT); >> break; >> case ELS_RJT_LOGIC: >> case ELS_RJT_UNAB: >> @@ -1508,12 +1530,12 @@ static void fc_fcp_rec_resp(struct fc_seq *seq, >> struct fc_frame *fp, void *arg) >> } >> fc_fcp_srr(fsp, r_ctl, offset); >> } else if (e_stat & ESB_ST_SEQ_INIT) { >> - >> + unsigned int rec_tov = get_fsp_rec_tov(fsp); >> /* >> * The remote port has the initiative, so just >> * keep waiting for it to complete. >> */ >> - fc_fcp_timer_set(fsp, FC_SCSI_REC_TOV); >> + fc_fcp_timer_set(fsp, rec_tov); >> } else { >> >> /* >> @@ -1626,6 +1648,7 @@ static void fc_fcp_srr(struct fc_fcp_pkt *fsp, enum >> fc_rctl r_ctl, u32 offset) >> struct fcp_srr *srr; >> struct fc_frame *fp; >> u8 cdb_op; >> + unsigned int rec_tov; >> >> rport = fsp->rport; >> rpriv = rport->dd_data; >> @@ -1650,8 +1673,9 @@ static void fc_fcp_srr(struct fc_fcp_pkt *fsp, enum >> fc_rctl r_ctl, u32 offset) >> rpriv->local_port->port_id, FC_TYPE_FCP, >> FC_FCTL_REQ, 0); >> >> + rec_tov = get_fsp_rec_tov(fsp); >> seq = lport->tt.exch_seq_send(lport, fp, fc_fcp_srr_resp, NULL, >> - fsp, jiffies_to_msecs(FC_SCSI_REC_TOV)); >> + fsp, jiffies_to_msecs(rec_tov)); >> if (!seq) >> goto retry; >> >> @@ -1675,6 +1699,7 @@ static void fc_fcp_srr_resp(struct fc_seq *seq, >> struct fc_frame *fp, void *arg) >> { >> struct fc_fcp_pkt *fsp = arg; >> struct fc_frame_header *fh; >> + unsigned int rec_tov; >> >> if (IS_ERR(fp)) { >> fc_fcp_srr_error(fsp, fp); >> @@ -1701,7 +1726,8 @@ static void fc_fcp_srr_resp(struct fc_seq *seq, >> struct fc_frame *fp, void *arg) >> switch (fc_frame_payload_op(fp)) { >> case ELS_LS_ACC: >> fsp->recov_retry = 0; >> - fc_fcp_timer_set(fsp, FC_SCSI_REC_TOV); >> + rec_tov = get_fsp_rec_tov(fsp); >> + fc_fcp_timer_set(fsp, rec_tov); >> break; >> case ELS_LS_RJT: >> default: >> >> _______________________________________________ >> devel mailing list >> [email protected] >> http://www.open-fcoe.org/mailman/listinfo/devel _______________________________________________ devel mailing list [email protected] http://www.open-fcoe.org/mailman/listinfo/devel
