On 03/01/2010 08:53 PM, Mike Christie wrote:
On 03/01/2010 12:06 PM, bet wrote:
1. Based on my timeouts I would think that my session would time out
Yes. It should timeout about 15 secs after you see
> Mar 1 07:14:27 bentCluster-1 kernel: connection4:0: ping timeout of
> 5 secs expired, recv timeout 5, last rx 4884304, last ping 4889304,
> now 4894304
You might be hitting a bug where the network layer gets stuck trying to
send data. I attached a patch that should fix the problem.
It looks like we have two bugs.
1. We can get stuck in the network code.
2. There is a race where the session->state can get reset due to the
xmit thread throwing an error after we have set the session->state but
before we have set the stop_stage.
The attached patch for RHEL 5.5 should fix them all.
--
You received this message because you are subscribed to the Google Groups
"open-iscsi" group.
To post to this group, send email to open-is...@googlegroups.com.
To unsubscribe from this group, send email to
open-iscsi+unsubscr...@googlegroups.com.
For more options, visit this group at
http://groups.google.com/group/open-iscsi?hl=en.
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 5c39369..2c908ce 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -254,8 +254,6 @@ static int iscsi_sw_tcp_xmit_segment(struct iscsi_tcp_conn
*tcp_conn,
if (r < 0) {
iscsi_tcp_segment_unmap(segment);
- if (copied || r == -EAGAIN)
- break;
return r;
}
copied += r;
@@ -276,11 +274,17 @@ static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn)
while (1) {
rc = iscsi_sw_tcp_xmit_segment(tcp_conn, segment);
- if (rc < 0) {
+ /*
+ * We may not have been able to send data because the conn
+ * is getting stopped. libiscsi will know so propogate err
+ * for it to do the right thing.
+ */
+ if (rc == -EAGAIN)
+ return rc;
+ else if (rc < 0) {
rc = ISCSI_ERR_XMIT_FAILED;
goto error;
- }
- if (rc == 0)
+ } else if (rc == 0)
break;
consumed += rc;
@@ -561,9 +565,10 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn
*cls_conn, int flag)
struct iscsi_conn *conn = cls_conn->dd_data;
struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
+ struct socket *sock = tcp_sw_conn->sock;
/* userspace may have goofed up and not bound us */
- if (!tcp_sw_conn->sock)
+ if (!sock)
return;
/*
* Make sure our recv side is stopped.
@@ -574,6 +579,11 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn
*cls_conn, int flag)
set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
+ if (sock->sk->sk_sleep && waitqueue_active(sock->sk->sk_sleep)) {
+ sock->sk->sk_err = EIO;
+ wake_up_interruptible(sock->sk->sk_sleep);
+ }
+
iscsi2_conn_stop(cls_conn, flag);
iscsi_sw_tcp_release_conn(conn);
}
diff --git a/drivers/scsi/libiscsi2.c b/drivers/scsi/libiscsi2.c
index 61abdf9..262617e 100644
--- a/drivers/scsi/libiscsi2.c
+++ b/drivers/scsi/libiscsi2.c
@@ -2657,14 +2657,15 @@ static void iscsi_start_session_recovery(struct
iscsi_session *session,
session->state = ISCSI_STATE_TERMINATE;
else if (conn->stop_stage != STOP_CONN_RECOVER)
session->state = ISCSI_STATE_IN_RECOVERY;
+
+ old_stop_stage = conn->stop_stage;
+ conn->stop_stage = flag;
spin_unlock_bh(&session->lock);
del_timer_sync(&conn->transport_timer);
iscsi2_suspend_tx(conn);
spin_lock_bh(&session->lock);
- old_stop_stage = conn->stop_stage;
- conn->stop_stage = flag;
conn->c_stage = ISCSI_CONN_STOPPED;
spin_unlock_bh(&session->lock);