On 03/01/2010 08:53 PM, Mike Christie wrote:
On 03/01/2010 12:06 PM, bet wrote:
1. Based on my timeouts I would think that my session would time out

Yes. It should timeout about 15 secs after you see
 > Mar 1 07:14:27 bentCluster-1 kernel: connection4:0: ping timeout of
 > 5 secs expired, recv timeout 5, last rx 4884304, last ping 4889304,
 > now 4894304

You might be hitting a bug where the network layer gets stuck trying to
send data. I attached a patch that should fix the problem.


It looks like we have two bugs.

1. We can get stuck in the network code.
2. There is a race where the session->state can get reset due to the xmit thread throwing an error after we have set the session->state but before we have set the stop_stage.

The attached patch for RHEL 5.5 should fix them all.

--
You received this message because you are subscribed to the Google Groups 
"open-iscsi" group.
To post to this group, send email to open-is...@googlegroups.com.
To unsubscribe from this group, send email to 
open-iscsi+unsubscr...@googlegroups.com.
For more options, visit this group at 
http://groups.google.com/group/open-iscsi?hl=en.

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 5c39369..2c908ce 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -254,8 +254,6 @@ static int iscsi_sw_tcp_xmit_segment(struct iscsi_tcp_conn 
*tcp_conn,
 
                if (r < 0) {
                        iscsi_tcp_segment_unmap(segment);
-                       if (copied || r == -EAGAIN)
-                               break;
                        return r;
                }
                copied += r;
@@ -276,11 +274,17 @@ static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn)
 
        while (1) {
                rc = iscsi_sw_tcp_xmit_segment(tcp_conn, segment);
-               if (rc < 0) {
+               /*
+                * We may not have been able to send data because the conn
+                * is getting stopped. libiscsi will know so propogate err
+                * for it to do the right thing.
+                */
+               if (rc == -EAGAIN)
+                       return rc;
+               else if (rc < 0) {
                        rc = ISCSI_ERR_XMIT_FAILED;
                        goto error;
-               }
-               if (rc == 0)
+               } else if (rc == 0)
                        break;
 
                consumed += rc;
@@ -561,9 +565,10 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn 
*cls_conn, int flag)
        struct iscsi_conn *conn = cls_conn->dd_data;
        struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
        struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
+       struct socket *sock = tcp_sw_conn->sock;
 
        /* userspace may have goofed up and not bound us */
-       if (!tcp_sw_conn->sock)
+       if (!sock)
                return;
        /*
         * Make sure our recv side is stopped.
@@ -574,6 +579,11 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn 
*cls_conn, int flag)
        set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
        write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
 
+       if (sock->sk->sk_sleep && waitqueue_active(sock->sk->sk_sleep)) {
+               sock->sk->sk_err = EIO;
+               wake_up_interruptible(sock->sk->sk_sleep);
+       }
+
        iscsi2_conn_stop(cls_conn, flag);
        iscsi_sw_tcp_release_conn(conn);
 }
diff --git a/drivers/scsi/libiscsi2.c b/drivers/scsi/libiscsi2.c
index 61abdf9..262617e 100644
--- a/drivers/scsi/libiscsi2.c
+++ b/drivers/scsi/libiscsi2.c
@@ -2657,14 +2657,15 @@ static void iscsi_start_session_recovery(struct 
iscsi_session *session,
                session->state = ISCSI_STATE_TERMINATE;
        else if (conn->stop_stage != STOP_CONN_RECOVER)
                session->state = ISCSI_STATE_IN_RECOVERY;
+
+       old_stop_stage = conn->stop_stage;
+       conn->stop_stage = flag;
        spin_unlock_bh(&session->lock);
 
        del_timer_sync(&conn->transport_timer);
        iscsi2_suspend_tx(conn);
 
        spin_lock_bh(&session->lock);
-       old_stop_stage = conn->stop_stage;
-       conn->stop_stage = flag;
        conn->c_stage = ISCSI_CONN_STOPPED;
        spin_unlock_bh(&session->lock);
 

Reply via email to