On 08/04/2009 01:12 PM, Erez Zilber wrote:
> On Tue, Aug 4, 2009 at 8:17 PM, Mike Christie<[email protected]> wrote:
>> Erez Zilber wrote:
>>> I'm running with open-iscsi.git HEAD + the check suspend bit patch +
>>> the wake xmit on error patch. If I disconnect the cable on the
>>> initiator side (even while not running IO), I see that after sending
>>> the signal, the iscsi_q_XX thread reaches 100% cpu. I ran it over
>>> several 1GB/ 10 GB drivers and got the same results.
>>>
>>> If I remove the wake xmit on error patch, I don't see this behavior.
>>>
>> Shoot, I have been running the xmit wakeup and suspend bit patch here
>> fine. Let me do some more testing.
>>
>> Is this something you always hit? Could you send me the final patch you
>> ended up using?
>
> I see this every time. Note that I'm not running with
> linux-2.6-iscsi.git. I'm using the open-iscsi.git tree + the 2 patches
> that I took without any change (using git-show) from the
> linux-2.6-iscsi.git tree. Which tree did you test it on?
>
> I added some printks to the code and saw that the signal does get sent
> from iscsi_sw_tcp_conn_stop, but I didn't see that (rc == -EINTR || rc
> == -EAGAIN) in iscsi_sw_tcp_xmit (), even when I ran IO on that
> session.
>
Does r in iscsi_sw_tcp_xmit_segment == 0?
If not I think you need a diffferent patch. In one of the patch versions
iscsi_sw_tcp_xmit_segment could return -ENODATA (this is when I had a
check for suspend_tx in there). iscsi_sw_tcp_xmit did not check this and
so I think we can loop.
Could you try the attached patch. It was made over open-iscsi.git for
you. I dropped the suspend bit check in iscsi_sw_tcp_xmit_segment,
because it is not needed. If we end up blocking the signal will wake us.
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"open-iscsi" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at http://groups.google.com/group/open-iscsi
-~----------~----~----~----~------~----~------~--~---
diff --git a/kernel/iscsi_tcp.c b/kernel/iscsi_tcp.c
index bce1594..028b295 100644
--- a/kernel/iscsi_tcp.c
+++ b/kernel/iscsi_tcp.c
@@ -279,7 +279,7 @@ static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn)
* is getting stopped. libiscsi will know so propogate err
* for it to do the right thing.
*/
- if (rc == -EAGAIN)
+ if (rc == -EINTR || rc == -EAGAIN)
return rc;
else if (rc < 0) {
rc = ISCSI_ERR_XMIT_FAILED;
@@ -483,6 +483,23 @@ static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task,
uint8_t opcode)
return 0;
}
+static void __iscsi_sw_tcp_prep_xmit_wq(struct work_struct *work)
+{
+ struct iscsi_sw_tcp_conn *tcp_sw_conn =
+ container_of(work, struct iscsi_sw_tcp_conn, prep_work);
+
+ allow_signal(SIGUSR1);
+ tcp_sw_conn->xmit_task = current;
+}
+
+static void iscsi_sw_tcp_prep_xmit_wq(struct iscsi_host *ihost,
+ struct iscsi_sw_tcp_conn *tcp_sw_conn)
+{
+ INIT_WORK(&tcp_sw_conn->prep_work, __iscsi_sw_tcp_prep_xmit_wq);
+ queue_work(ihost->workq, &tcp_sw_conn->prep_work);
+ flush_workqueue(ihost->workq);
+}
+
static struct iscsi_cls_conn *
iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session,
uint32_t conn_idx)
@@ -513,6 +530,8 @@ iscsi_sw_tcp_conn_create(struct iscsi_cls_session
*cls_session,
goto free_tx_tfm;
tcp_conn->rx_hash = &tcp_sw_conn->rx_hash;
+ iscsi_sw_tcp_prep_xmit_wq(shost_priv(conn->session->host),
+ tcp_sw_conn);
return cls_conn;
free_tx_tfm:
@@ -581,6 +600,10 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn
*cls_conn, int flag)
set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
+ set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+ if (tcp_sw_conn->xmit_task)
+ send_sig(SIGUSR1, tcp_sw_conn->xmit_task, 0);
+
iscsi_conn_stop(cls_conn, flag);
iscsi_sw_tcp_release_conn(conn);
}
diff --git a/kernel/iscsi_tcp.h b/kernel/iscsi_tcp.h
index f9a4044..49393f1 100644
--- a/kernel/iscsi_tcp.h
+++ b/kernel/iscsi_tcp.h
@@ -36,8 +36,9 @@ struct iscsi_sw_tcp_send {
};
struct iscsi_sw_tcp_conn {
- struct iscsi_conn *iscsi_conn;
struct socket *sock;
+ struct task_struct *xmit_task;
+ struct work_struct prep_work;
struct iscsi_sw_tcp_send out;
/* old values for socket callbacks */
diff --git a/kernel/libiscsi.c b/kernel/libiscsi.c
index 73c4231..0ba612e 100644
--- a/kernel/libiscsi.c
+++ b/kernel/libiscsi.c
@@ -1212,6 +1212,9 @@ static int iscsi_xmit_task(struct iscsi_conn *conn)
struct iscsi_task *task = conn->task;
int rc;
+ if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx))
+ return -ENODATA;
+
__iscsi_get_task(task);
spin_unlock_bh(&conn->session->lock);
rc = conn->session->tt->xmit_task(task);
@@ -1261,7 +1264,10 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
int rc = 0;
spin_lock_bh(&conn->session->lock);
- if (unlikely(conn->suspend_tx)) {
+ if (signal_pending(current))
+ flush_signals(current);
+
+ if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
ISCSI_DBG_SESSION(conn->session, "Tx suspended!\n");
spin_unlock_bh(&conn->session->lock);
return -ENODATA;
@@ -1270,7 +1276,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
if (conn->task) {
rc = iscsi_xmit_task(conn);
if (rc)
- goto again;
+ goto done;
}
/*
@@ -1290,7 +1296,7 @@ check_mgmt:
}
rc = iscsi_xmit_task(conn);
if (rc)
- goto again;
+ goto done;
}
/* process pending command queue */
@@ -1311,14 +1317,14 @@ check_mgmt:
list_add_tail(&conn->task->running,
&conn->cmdqueue);
conn->task = NULL;
- goto again;
+ goto done;
} else
fail_scsi_task(conn->task, DID_ABORT);
continue;
}
rc = iscsi_xmit_task(conn);
if (rc)
- goto again;
+ goto done;
/*
* we could continuously get new task requests so
* we need to check the mgmt queue for nops that need to
@@ -1344,16 +1350,14 @@ check_mgmt:
conn->task->state = ISCSI_TASK_RUNNING;
rc = iscsi_xmit_task(conn);
if (rc)
- goto again;
+ goto done;
if (!list_empty(&conn->mgmtqueue))
goto check_mgmt;
}
spin_unlock_bh(&conn->session->lock);
return -ENODATA;
-again:
- if (unlikely(conn->suspend_tx))
- rc = -ENODATA;
+done:
spin_unlock_bh(&conn->session->lock);
return rc;
}