From: Mike Christie <[EMAIL PROTECTED]>
Ok, here is a tested patch :)
This patch removes the timer_pending call becuase if a timer function
is running timer_pending will not tell us about it. If the timer function
has not grabbed the pkt lock, then fc_io_compl could end up freeing
the pkt. So when the timer function gets the pkt lock the pkt has been
freed.
This patch also cleans up the abort timer and completion code. It is
untested because the abort response handling is busted.
And this patch fixes a regerssion caused by my tag map patch.
Where fc_fcp_complete was doing the right thing and just dropping
the lock, for some dumb reason I had fc_io_compl call
fc_fcp_lock_pkt and fc_fcp_unlock_pkt. The problem with the latter functions
is that they release the refcount, so if another caller came in and
called fc_io_compl they could have ended up freeing the fsp from under
us. And to make it worse when fc_fcp_lock_pkt fails it would not have
grabbed the lock, and the fc_io_compl callers expects that we will have it.
Patch was made over
libfc: remove shared tag map and fix timer usage in fc_fcp.c (take 2)
Signed-off-by: Mike Christie <[EMAIL PROTECTED]>
---
drivers/scsi/libfc/fc_fcp.c | 57 +++++++++++-------------------------------
1 files changed, 15 insertions(+), 42 deletions(-)
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index bfb9fc4..85cef96 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -50,6 +50,7 @@ static struct kmem_cache *scsi_pkt_cachep;
#define FC_SRB_ABORTED (1 << 3) /* abort acknowleged */
#define FC_SRB_DISCONTIG (1 << 4) /* non-sequential data recvd */
#define FC_SRB_COMPL (1 << 5) /* fc_io_compl has been run */
+#define FC_SRB_FCP_PROCESSING_TMO (1 << 6) /* timer function processing */
#define FC_SRB_READ (1 << 1)
#define FC_SRB_WRITE (1 << 0)
@@ -783,18 +784,6 @@ static void fc_fcp_complete(struct fc_fcp_pkt *fsp)
struct fc_seq *sp;
u32 f_ctl;
- if (timer_pending(&fsp->timer)) {
- spin_unlock(&fsp->scsi_pkt_lock);
- /*
- * TODO mnc: we should probably make the timer per port or per
- * host, and then have that timer function check for timeouts
- * on all commands. This will prevent the del_timer_sync call
- * in the main IO path.
- */
- del_timer_sync(&fsp->timer);
- spin_lock(&fsp->scsi_pkt_lock);
- }
-
/*
* Test for transport underrun, independent of response underrun status.
*/
@@ -1049,13 +1038,11 @@ static int fc_fcp_pkt_abort(struct fc_lport *lp, struct
fc_fcp_pkt *fsp)
} else if (fsp->state & FC_SRB_ABORTED) {
FC_DBG("target abort cmd passed\n");
rc = SUCCESS;
- }
- if (timer_pending(&fsp->timer)) {
- spin_unlock(&fsp->scsi_pkt_lock);
- del_timer_sync(&fsp->timer);
- spin_lock(&fsp->scsi_pkt_lock);
+ fsp->status_code = FC_CMD_ABORTED;
+ fc_io_compl(fsp);
}
+
return rc;
}
@@ -1251,6 +1238,7 @@ static void fc_fcp_timeout(unsigned long data)
if (fsp->state & FC_SRB_COMPL)
goto unlock;
+ fsp->state |= FC_SRB_FCP_PROCESSING_TMO;
cdb_op = fsp->cdb_cmd.fc_cdb[0];
@@ -1264,6 +1252,8 @@ static void fc_fcp_timeout(unsigned long data)
else if (cdb_op == READ_10 || cdb_op == READ_6 ||
cdb_op == WRITE_10 || cdb_op == WRITE_6)
fc_timeout_error(fsp);
+
+ fsp->state &= ~FC_SRB_FCP_PROCESSING_TMO;
unlock:
fc_fcp_unlock_pkt(fsp);
}
@@ -1486,8 +1476,8 @@ static void fc_fcp_rec_error(struct fc_fcp_pkt *fsp,
struct fc_frame *fp)
break;
default:
- FC_DBG("REC fid %x error unexpected error %d\n",
- fsp->rport->port_id, error);
+ FC_DBG("REC %p fid %x error unexpected error %d\n",
+ fsp, fsp->rport->port_id, error);
fsp->status_code = FC_CMD_PLOGO;
/* fall through */
@@ -1496,8 +1486,9 @@ static void fc_fcp_rec_error(struct fc_fcp_pkt *fsp,
struct fc_frame *fp)
* Assume REC or LS_ACC was lost.
* The exchange manager will have aborted REC, so retry.
*/
- FC_DBG("REC fid %x error error %d\n",
- fsp->rport->port_id, error);
+ FC_DBG("REC fid %x error error %d retry %d/%d\n",
+ fsp->rport->port_id, error, fsp->recov_retry,
+ FC_MAX_RECOV_RETRY);
if (fsp->recov_retry++ < FC_MAX_RECOV_RETRY)
fc_fcp_rec(fsp);
else
@@ -1793,12 +1784,10 @@ static void fc_io_compl(struct fc_fcp_pkt *sp)
unsigned long flags;
sp->state |= FC_SRB_COMPL;
- if (timer_pending(&sp->timer)) {
- fc_fcp_unlock_pkt(sp);
+ if (!(sp->state & FC_SRB_FCP_PROCESSING_TMO)) {
+ spin_unlock(&sp->scsi_pkt_lock);
del_timer_sync(&sp->timer);
- if (fc_fcp_lock_pkt(sp))
- /* A TMF or relogin killed the command from under us */
- return;
+ spin_lock(&sp->scsi_pkt_lock);
}
lp = sp->lp;
@@ -1933,12 +1922,6 @@ int fc_eh_abort(struct scsi_cmnd *sc_cmd)
fc_fcp_pkt_hold(sp);
spin_unlock_irqrestore(lp->host->host_lock, flags);
- /*
- * if there is our timer pending then
- * delete that first
- */
- del_timer_sync(&sp->timer);
-
if (fc_fcp_lock_pkt(sp)) {
/* completed while we were waiting for timer to be deleted */
rc = SUCCESS;
@@ -1947,16 +1930,6 @@ int fc_eh_abort(struct scsi_cmnd *sc_cmd)
sp->state |= FC_SRB_ABORT_PENDING;
rc = fc_fcp_pkt_abort(lp, sp);
- /*
- * if the abort failed the command could still be executing,
- * and so we should not clean up just yet. Since we are returning
- * failed, the device reset handler will figure things out.
- */
- if (rc == SUCCESS) {
- sp->state = FC_SRB_FREE;
- /* release ref from initial setup in queuecommand */
- fc_fcp_pkt_release(sp);
- }
fc_fcp_unlock_pkt(sp);
release_pkt:
--
1.5.4.1
_______________________________________________
devel mailing list
[email protected]
http://www.open-fcoe.org/mailman/listinfo/devel