From: Mike Christie <[EMAIL PROTECTED]>
This attempts to handle frame allocation failures in fc_fcp.c.
Instead of bugging out in fc_fcp_send_data, we just drop
the command, wait for it to timeout (scsiml will abort) and
decrease the can_queue.
I thought about retrying like is done in other places, but for
iscsi it seemed like we could avoid the problem if we just sent
fewer commands. That is why for the linux-iscsi/cisco driver
the can_queue is some extremely low value like 3.
The idea is that if allocations keep failing and we get to the worst case
where we have to only rely on our reserves then can_queue will eventually
equal the reserves.
Signed-off-by: Mike Christie <[EMAIL PROTECTED]>
---
drivers/scsi/libfc/fc_fcp.c | 78 +++++++++++++++++++++++++++++++++++++------
1 files changed, 67 insertions(+), 11 deletions(-)
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 8acf95b..a050dd4 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -49,6 +49,7 @@ static struct kmem_cache *scsi_pkt_cachep;
#define FC_SRB_DISCONTIG (1 << 4) /* non-sequential data recvd */
#define FC_SRB_COMPL (1 << 5) /* fc_io_compl has been run */
#define FC_SRB_FCP_PROCESSING_TMO (1 << 6) /* timer function processing */
+#define FC_SRB_NOMEM (1 << 7) /* dropped to out of mem */
#define FC_SRB_READ (1 << 1)
#define FC_SRB_WRITE (1 << 0)
@@ -128,6 +129,7 @@ struct fc_fcp_pkt {
struct fc_fcp_internal {
mempool_t *scsi_pkt_pool;
struct list_head scsi_pkt_queue;
+ u8 throttled;
};
#define fc_get_scsi_internal(x) ((struct fc_fcp_internal
*)(x)->scsi_priv)
@@ -426,9 +428,9 @@ crc_err:
* Send SCSI data to target.
* Called after receiving a Transfer Ready data descriptor.
*/
-static void fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *sp,
- size_t offset, size_t len,
- struct fc_frame *oldfp, int sg_supp)
+static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *sp,
+ size_t offset, size_t len,
+ struct fc_frame *oldfp, int sg_supp)
{
struct scsi_cmnd *sc;
struct scatterlist *sg;
@@ -454,7 +456,7 @@ static void fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct
fc_seq *sp,
len, offset);
}
fc_fcp_send_abort(fsp);
- return;
+ return 0;
} else if (offset != fsp->xfer_len) {
/*
* Out of Order Data Request - no problem, but unexpected.
@@ -527,12 +529,16 @@ static void fc_fcp_send_data(struct fc_fcp_pkt *fsp,
struct fc_seq *sp,
using_sg = 0;
if (using_sg) {
fp = _fc_frame_alloc(lp, 0);
+ if (!fp)
+ return -ENOMEM;
} else {
fp = fc_frame_alloc(lp, tlen);
+ if (!fp)
+ return -ENOMEM;
+
data = (void *)(fr_hdr(fp)) +
sizeof(struct fc_frame_header);
}
- BUG_ON(!fp);
fc_frame_setup(fp, FC_RCTL_DD_SOL_DATA, FC_TYPE_FCP);
fc_frame_set_offset(fp, frame_offset);
}
@@ -590,10 +596,11 @@ static void fc_fcp_send_data(struct fc_fcp_pkt *fsp,
struct fc_seq *sp,
if (error) {
WARN_ON(1); /* send error should be rare */
fc_fcp_retry_cmd(fsp);
- return;
+ return 0;
}
}
fsp->xfer_len += len; /* premature count? */
+ return 0;
}
static void fc_fcp_abts_resp(struct fc_fcp_pkt *fsp, struct fc_frame_header
*fh)
@@ -615,6 +622,38 @@ static void fc_fcp_abts_resp(struct fc_fcp_pkt *fsp,
struct fc_frame_header *fh)
}
/*
+ * fc_fcp_reduce_can_queue - drop can_queue
+ * @lp: lport to drop queueing for
+ *
+ * If we are getting memory allocation failures, then we may
+ * be trying to execute too many commands. We let the running
+ * commands complete or timeout, then try again with a reduced
+ * can_queue. Eventually we will hit the point where we run
+ * on all reserved structs.
+ */
+static void fc_fcp_reduce_can_queue(struct fc_lport *lp)
+{
+ struct fc_fcp_internal *si = fc_get_scsi_internal(lp);
+ unsigned long flags;
+ int can_queue;
+
+ spin_lock_irqsave(lp->host->host_lock, flags);
+ if (si->throttled)
+ goto done;
+ si->throttled = 1;
+
+ can_queue = lp->host->can_queue;
+ can_queue >>= 1;
+ if (!can_queue)
+ can_queue = 1;
+ lp->host->can_queue = can_queue;
+ shost_printk(KERN_ERR, lp->host, "Could not allocate frame.\n"
+ "Reducing can_queue to %d.\n", can_queue);
+done:
+ spin_unlock_irqrestore(lp->host->host_lock, flags);
+}
+
+/*
* exch mgr calls this routine to process scsi
* exchanges.
*
@@ -629,6 +668,7 @@ static void fc_fcp_recv(struct fc_seq *sp, struct fc_frame
*fp, void *arg)
struct fc_frame_header *fh;
struct fc_data_desc *dd;
u8 r_ctl;
+ int rc = 0;
if (IS_ERR(fp))
goto errout;
@@ -660,11 +700,14 @@ static void fc_fcp_recv(struct fc_seq *sp, struct
fc_frame *fp, void *arg)
dd = fc_frame_payload_get(fp, sizeof(*dd));
WARN_ON(!dd);
- fc_fcp_send_data(fsp, sp,
- (size_t) ntohl(dd->dd_offset),
- (size_t) ntohl(dd->dd_len), fp,
- lp->capabilities & TRANS_C_SG);
- lp->tt.seq_set_rec_data(sp, fsp->xfer_len);
+ rc = fc_fcp_send_data(fsp, sp,
+ (size_t) ntohl(dd->dd_offset),
+ (size_t) ntohl(dd->dd_len), fp,
+ lp->capabilities & TRANS_C_SG);
+ if (!rc)
+ lp->tt.seq_set_rec_data(sp, fsp->xfer_len);
+ else if (rc == -ENOMEM)
+ fsp->state |= FC_SRB_NOMEM;
} else if (r_ctl == FC_RCTL_DD_SOL_DATA) {
/*
* received a DATA frame
@@ -687,6 +730,8 @@ out:
errout:
if (IS_ERR(fp))
fc_fcp_error(fsp, fp);
+ else if (rc == -ENOMEM)
+ fc_fcp_reduce_can_queue(lp);
}
static void fc_fcp_resp(struct fc_fcp_pkt *fsp, struct fc_frame *fp)
@@ -1768,6 +1813,7 @@ EXPORT_SYMBOL(fc_queuecommand);
*/
static void fc_io_compl(struct fc_fcp_pkt *sp)
{
+ struct fc_fcp_internal *si;
struct scsi_cmnd *sc_cmd;
struct fc_lport *lp;
unsigned long flags;
@@ -1780,12 +1826,22 @@ static void fc_io_compl(struct fc_fcp_pkt *sp)
}
lp = sp->lp;
+ si = fc_get_scsi_internal(lp);
spin_lock_irqsave(lp->host->host_lock, flags);
if (!sp->cmd) {
spin_unlock_irqrestore(lp->host->host_lock, flags);
return;
}
+ /*
+ * if a command timed out while we had to try and throttle IO
+ * and it is now getting cleaned up, then we are about to
+ * try again so clear the throttled flag incase we get more
+ * time outs.
+ */
+ if (si->throttled && sp->state & FC_SRB_NOMEM)
+ si->throttled = 0;
+
sc_cmd = sp->cmd;
sp->cmd = NULL;
--
1.5.4.1
_______________________________________________
devel mailing list
[email protected]
http://www.open-fcoe.org/mailman/listinfo/devel