Mike Christie wrote:
> Hey Hannes,
> 
> This will not fix any hangs after the scsi eh or iscsi eh has fired, but 
> I think this patch will help prevent the scsi eh from firing when we do 
> not need it to like you have seen in some bugzillas. The patch was made 
> over the my iscsi tree. It should also apply to scsi-rc-fixes with the 
> patches I sent the other day.
> 
> I modified our command timedout handler so if a command has made some 
> progress since the last timeout or if it is just getting started (it has 
> been put on the wire but we have not yet got anything for it), then we 
> will ask for some more time to run it.
> 
> This is helping here for these problems:
> 1. sending more IO than the disk/target can handle
> 2. using a shorter scsi cmd timeout with a slower link
> 

Attached is a updated patch that should better handle larger writes. If 
we have successfully sent IO to the network layer or LLD in cxgb3i's 
case, and the command times out then we will give the response or r2t 
more time to reach us. If on the next timeout we still have not got 
anything then we will let the scsi eh run.

--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"open-iscsi" group.
To post to this group, send email to open-iscsi@googlegroups.com
To unsubscribe from this group, send email to 
open-iscsi+unsubscr...@googlegroups.com
For more options, visit this group at http://groups.google.com/group/open-iscsi
-~----------~----~----~----~------~----~------~--~---

>From 23acb13e1290eb0e0c08287fea1e396a17cd2167 Mon Sep 17 00:00:00 2001
From: Mike Christie <micha...@cs.wisc.edu>
Date: Mon, 18 May 2009 19:59:17 -0500
Subject: [PATCH 4/5] libiscsi: reset command timer if iscsi task is making progress

This patch has the iscsi eh cmd time out handler ask for more
time if the command has completed a pdu within the command timer.
It also makes sure that we check the transport and that the
transport checks do not accidentally reset the command timer
if the command really does need to be unjammed via the scsi eh.

Signed-off-by: Mike Christie <micha...@cs.wisc.edu>
---
 drivers/scsi/libiscsi.c     |   52 +++++++++++++++++++++++++++++++++---------
 drivers/scsi/libiscsi_tcp.c |    6 +++-
 include/scsi/libiscsi.h     |    4 +++
 3 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 59908ae..4cc3184 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1361,6 +1361,9 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 	task->state = ISCSI_TASK_PENDING;
 	task->conn = conn;
 	task->sc = sc;
+	task->have_checked_conn = 0;
+	task->last_timeout = jiffies;
+	task->last_recv = jiffies;
 	INIT_LIST_HEAD(&task->running);
 	return task;
 }
@@ -1716,17 +1719,18 @@ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
 		return 0;
 }
 
-static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
+static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 {
+	enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
+	struct iscsi_task *task = NULL;
 	struct iscsi_cls_session *cls_session;
 	struct iscsi_session *session;
 	struct iscsi_conn *conn;
-	enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
 
-	cls_session = starget_to_session(scsi_target(scmd->device));
+	cls_session = starget_to_session(scsi_target(sc->device));
 	session = cls_session->dd_data;
 
-	ISCSI_DBG_SESSION(session, "scsi cmd %p timedout\n", scmd);
+	ISCSI_DBG_SESSION(session, "scsi cmd %p timedout\n", sc);
 
 	spin_lock(&session->lock);
 	if (session->state != ISCSI_STATE_LOGGED_IN) {
@@ -1745,6 +1749,23 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
 		goto done;
 	}
 
+	task = (struct iscsi_task *)sc->SCp.ptr;
+	if (!task)
+		goto done;
+	/*
+	 * If we have processed a PDU for the command since the last
+	 * timeout then ask for more time.
+	 */
+	if (time_after_eq(task->last_recv, task->last_timeout)) {
+		ISCSI_DBG_CONN(conn, "Command making progress. Asking "
+			       "scsi-ml for more time to complete. "
+			       "Last data recv at %lu. Last timeout was at "
+			       "%lu\n.", task->last_recv, task->last_timeout);
+		task->have_checked_conn = 0;
+		rc = BLK_EH_RESET_TIMER;
+		goto done;
+	}
+
 	if (!conn->recv_timeout && !conn->ping_timeout)
 		goto done;
 	/*
@@ -1755,20 +1776,29 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
 		rc = BLK_EH_RESET_TIMER;
 		goto done;
 	}
+
+	/* Assumes nop timeout is shorter than scsi cmd timeout */
+	if (task->have_checked_conn)
+		goto done;
+
 	/*
-	 * if we are about to check the transport then give the command
-	 * more time
+	 * Checking the transport already or nop from a cmd timeout still
+	 * running
 	 */
-	if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ),
-			   jiffies)) {
+	if (conn->ping_task) {
+		task->have_checked_conn = 1;
 		rc = BLK_EH_RESET_TIMER;
 		goto done;
 	}
 
-	/* if in the middle of checking the transport then give us more time */
-	if (conn->ping_task)
-		rc = BLK_EH_RESET_TIMER;
+	/* Make sure there is a transport check done */
+	iscsi_send_nopout(conn, NULL);
+	task->have_checked_conn = 1;
+	rc = BLK_EH_RESET_TIMER;
+
 done:
+	if (task)
+		task->last_timeout = jiffies;
 	spin_unlock(&session->lock);
 	ISCSI_DBG_SESSION(session, "return %s\n", rc == BLK_EH_RESET_TIMER ?
 			  "timer reset" : "nh");
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 2bc0709..0c3bd5c 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -686,6 +686,7 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 				     "offset=%d, datalen=%d)\n",
 				      tcp_task->data_offset,
 				      tcp_conn->in.datalen);
+			task->last_recv = jiffies;
 			rc = iscsi_segment_seek_sg(&tcp_conn->in.segment,
 						   sdb->table.sgl,
 						   sdb->table.nents,
@@ -713,9 +714,10 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 			rc = ISCSI_ERR_BAD_ITT;
 		else if (ahslen)
 			rc = ISCSI_ERR_AHSLEN;
-		else if (task->sc->sc_data_direction == DMA_TO_DEVICE)
+		else if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
+			task->last_recv = jiffies;
 			rc = iscsi_tcp_r2t_rsp(conn, task);
-		else
+		} else
 			rc = ISCSI_ERR_PROTO;
 		spin_unlock(&conn->session->lock);
 		break;
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 196525c..714c34a 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -125,6 +125,10 @@ struct iscsi_task {
 	struct scsi_cmnd	*sc;		/* associated SCSI cmd*/
 	struct iscsi_conn	*conn;		/* used connection    */
 
+	/* data processing tracking */
+	unsigned long		last_recv;
+	unsigned long		last_timeout;
+	int			have_checked_conn;
 	/* state set/tested under session->lock */
 	int			state;
 	atomic_t		refcount;
-- 
1.6.0.6

Reply via email to