On 04/04/2008 03:54 PM, supreeth wrote:
throughput using Intel 1Gb ethernet cards (e1000) and a significant
increase
in throughput using the Intel 10Gb cards.
For writes I am getting 1000MB/s (netperf reports 1100), but for reads I
am getting only 600 MB/s. On the write side we use sendpage which is
zero copy. For reads though, we use memcpy. perf traces are showing we
are in that memcpy a lot. So I thought these patches might help since
Supreeth had also mentioned a significant increase in throughput.
The attached patch ports Supreeth's patches to linus's tree.
For reads I just do something like
fio --filename=/dev/sdXYZ --direct=1 --rw=randread --bs=1m --size=10G
--numjobs=4 --runtime=10 --group_reporting --name=file1
For writes I do:
fio --filename=/dev/XYZ --direct=1 --rw=randwrite --bs=1m --size=10G
--numjobs=4 --runtime=10 --group_reporting --name=file1
The iscsi target disks are memory backed disks, so not actually going to
real spinning disks :)
Also in the attached patch is a change to the r2t code which speeds up
writes when the IO size requires R2Ts.
With Linus's tree I had to use the "noop" io scheduler, turn of iptables
(/etc/init.d/iptables stop" on fedora/RHEL systems), turn off
irqbalance, turn off cpuspeed, and then play around with the
/sys/block/sdXYZ/queue/rq_affinity setting. Also it sometimes helped
when the the MaxRecvDataSegmentLength and MaxXmitDataSegmentLength were
larger (128 - 256K) and then the /sys/block/sdXYZ/queue/max_sectors_kb
matched them.
And make sure ioatdma is loaded and that /sys/class/dma/ has some dma
channels.
If you have a fast system with ioatdma please try it out.
--
You received this message because you are subscribed to the Google Groups
"open-iscsi" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/open-iscsi?hl=en.
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index d2ad3d6..42b6861 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -2158,11 +2158,10 @@ int cxgbi_set_conn_param(struct iscsi_cls_conn *cls_conn,
enum iscsi_param param, char *buf, int buflen)
{
struct iscsi_conn *conn = cls_conn->dd_data;
- struct iscsi_session *session = conn->session;
struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
struct cxgbi_conn *cconn = tcp_conn->dd_data;
struct cxgbi_sock *csk = cconn->cep->csk;
- int value, err = 0;
+ int err = 0;
log_debug(1 << CXGBI_DBG_ISCSI,
"cls_conn 0x%p, param %d, buf(%d) %s.\n",
@@ -2183,16 +2182,6 @@ int cxgbi_set_conn_param(struct iscsi_cls_conn *cls_conn,
conn->hdrdgst_en,
conn->datadgst_en, 0);
break;
- case ISCSI_PARAM_MAX_R2T:
- sscanf(buf, "%d", &value);
- if (value <= 0 || !is_power_of_2(value))
- return -EINVAL;
- if (session->max_r2t == value)
- break;
- iscsi_tcp_r2tpool_free(session);
- err = iscsi_set_param(cls_conn, param, buf, buflen);
- if (!err && iscsi_tcp_r2tpool_alloc(session))
- return -ENOMEM;
case ISCSI_PARAM_MAX_RECV_DLENGTH:
err = iscsi_set_param(cls_conn, param, buf, buflen);
if (!err)
@@ -2325,7 +2314,6 @@ struct iscsi_cls_session *cxgbi_create_session(struct iscsi_endpoint *ep,
struct cxgbi_hba *chba;
struct Scsi_Host *shost;
struct iscsi_cls_session *cls_session;
- struct iscsi_session *session;
if (!ep) {
pr_err("missing endpoint.\n");
@@ -2346,17 +2334,9 @@ struct iscsi_cls_session *cxgbi_create_session(struct iscsi_endpoint *ep,
if (!cls_session)
return NULL;
- session = cls_session->dd_data;
- if (iscsi_tcp_r2tpool_alloc(session))
- goto remove_session;
-
log_debug(1 << CXGBI_DBG_ISCSI,
"ep 0x%p, cls sess 0x%p.\n", ep, cls_session);
return cls_session;
-
-remove_session:
- iscsi_session_teardown(cls_session);
- return NULL;
}
EXPORT_SYMBOL_GPL(cxgbi_create_session);
@@ -2365,7 +2345,6 @@ void cxgbi_destroy_session(struct iscsi_cls_session *cls_session)
log_debug(1 << CXGBI_DBG_ISCSI,
"cls sess 0x%p.\n", cls_session);
- iscsi_tcp_r2tpool_free(cls_session->dd_data);
iscsi_session_teardown(cls_session);
}
EXPORT_SYMBOL_GPL(cxgbi_destroy_session);
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index fec47de..9edb244 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -121,12 +121,46 @@ static inline int iscsi_sw_sk_state_check(struct sock *sk)
return 0;
}
+static void iscsi_tcp_dma_cleanup(struct iscsi_tcp_conn *tcp_conn,
+ struct sock *sk)
+{
+ struct sk_buff *skb;
+ dma_cookie_t done, used, skb_cookie = 0;
+
+ if (!tcp_conn->dma_chan || tcp_conn->dma_cookie <= 0)
+ return;
+
+ dma_async_memcpy_issue_pending(tcp_conn->dma_chan);
+
+ while (dma_async_memcpy_complete(tcp_conn->dma_chan,
+ tcp_conn->dma_cookie, &done, &used) ==
+ DMA_IN_PROGRESS) {
+ /* do partial cleanup of sk_async_wait_queue up to done*/
+ do {
+ skb = skb_peek(&sk->sk_async_wait_queue);
+ if (NULL == skb || done < skb->dma_cookie)
+ break;
+ else {
+ skb_cookie = skb->dma_cookie;
+ __skb_dequeue(&sk->sk_async_wait_queue);
+ kfree_skb(skb);
+ }
+
+ } while (skb_cookie != done);
+ }
+
+ /* Safe to free early-copied skbs now */
+ __skb_queue_purge(&sk->sk_async_wait_queue);
+ tcp_conn->dma_cookie = 0;
+}
+
static void iscsi_sw_tcp_data_ready(struct sock *sk, int flag)
{
struct iscsi_conn *conn = sk->sk_user_data;
struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
read_descriptor_t rd_desc;
+ iscsi_tcp_set_dma_chan(tcp_conn);
read_lock(&sk->sk_callback_lock);
/*
@@ -146,6 +180,8 @@ static void iscsi_sw_tcp_data_ready(struct sock *sk, int flag)
/* If we had to (atomically) map a highmem page,
* unmap it now. */
iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
+ /* Leak? If we stop the conn mid segment then could this leak */
+// iscsi_tcp_dma_cleanup(tcp_conn, sk);
}
static void iscsi_sw_tcp_state_change(struct sock *sk)
@@ -698,6 +734,10 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
sk->sk_allocation = GFP_ATOMIC;
+ iscsi_tcp_init_dma_sock(sk);
+
+ tcp_conn->sk = sk;
+
iscsi_sw_tcp_conn_set_callbacks(conn);
tcp_sw_conn->sendpage = tcp_sw_conn->sock->ops->sendpage;
/*
@@ -716,10 +756,8 @@ static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
int buflen)
{
struct iscsi_conn *conn = cls_conn->dd_data;
- struct iscsi_session *session = conn->session;
struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
- int value;
switch(param) {
case ISCSI_PARAM_HDRDGST_EN:
@@ -730,17 +768,6 @@ static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
tcp_sw_conn->sendpage = conn->datadgst_en ?
sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage;
break;
- case ISCSI_PARAM_MAX_R2T:
- sscanf(buf, "%d", &value);
- if (value <= 0 || !is_power_of_2(value))
- return -EINVAL;
- if (session->max_r2t == value)
- break;
- iscsi_tcp_r2tpool_free(session);
- iscsi_set_param(cls_conn, param, buf, buflen);
- if (iscsi_tcp_r2tpool_alloc(session))
- return -ENOMEM;
- break;
default:
return iscsi_set_param(cls_conn, param, buf, buflen);
}
@@ -827,12 +854,8 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
session = cls_session->dd_data;
shost->can_queue = session->scsi_cmds_max;
- if (iscsi_tcp_r2tpool_alloc(session))
- goto remove_session;
return cls_session;
-remove_session:
- iscsi_session_teardown(cls_session);
remove_host:
iscsi_host_remove(shost);
free_host:
@@ -844,7 +867,6 @@ static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
{
struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
- iscsi_tcp_r2tpool_free(cls_session->dd_data);
iscsi_session_teardown(cls_session);
iscsi_host_remove(shost);
@@ -959,11 +981,13 @@ static int __init iscsi_sw_tcp_init(void)
if (!iscsi_sw_tcp_scsi_transport)
return -ENODEV;
+ net_dmaengine_get();
return 0;
}
static void __exit iscsi_sw_tcp_exit(void)
{
+ net_dmaengine_put();
iscsi_unregister_transport(&iscsi_sw_tcp_transport);
}
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index da8b615..7b4bdd4 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2780,6 +2780,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
task->itt = cmd_i;
task->state = ISCSI_TASK_FREE;
INIT_LIST_HEAD(&task->running);
+ spin_lock_init(&task->lock);
}
if (!try_module_get(iscsit->owner))
@@ -2884,6 +2885,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
if (!data)
goto login_task_data_alloc_fail;
+
conn->login_task->data = conn->data = data;
init_timer(&conn->tmf_timer);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 8eeb39f..0460ef2 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -37,6 +37,7 @@
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
+#include <net/netdma.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
@@ -439,25 +440,14 @@ iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
void iscsi_tcp_cleanup_task(struct iscsi_task *task)
{
struct iscsi_tcp_task *tcp_task = task->dd_data;
- struct iscsi_r2t_info *r2t;
/* nothing to do for mgmt */
if (!task->sc)
return;
- /* flush task's r2t queues */
- while (kfifo_out(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
- kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
- sizeof(void*));
- ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
- }
-
- r2t = tcp_task->r2t;
- if (r2t != NULL) {
- kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
- sizeof(void*));
- tcp_task->r2t = NULL;
- }
+ /* reset task's r2t */
+ tcp_task->r2t.data_length = 0;
+ tcp_task->r2t.sent = 0;
}
EXPORT_SYMBOL_GPL(iscsi_tcp_cleanup_task);
@@ -515,9 +505,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
struct iscsi_tcp_task *tcp_task = task->dd_data;
struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
- struct iscsi_r2t_info *r2t;
+ struct iscsi_r2t_info *r2t = &tcp_task->r2t;
int r2tsn = be32_to_cpu(rhdr->r2tsn);
- int rc;
if (tcp_conn->in.datalen) {
iscsi_conn_printk(KERN_ERR, conn,
@@ -542,21 +531,12 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
return 0;
}
- rc = kfifo_out(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
- if (!rc) {
- iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
- "Target has sent more R2Ts than it "
- "negotiated for or driver has has leaked.\n");
- return ISCSI_ERR_PROTO;
- }
-
+ memset(r2t, 0, sizeof(*r2t));
r2t->exp_statsn = rhdr->statsn;
r2t->data_length = be32_to_cpu(rhdr->data_length);
if (r2t->data_length == 0) {
iscsi_conn_printk(KERN_ERR, conn,
"invalid R2T with zero data len\n");
- kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
- sizeof(void*));
return ISCSI_ERR_DATALEN;
}
@@ -571,8 +551,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
"invalid R2T with data len %u at offset %u "
"and total length %d\n", r2t->data_length,
r2t->data_offset, scsi_out(task->sc)->length);
- kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
- sizeof(void*));
+ r2t->data_length = 0;
return ISCSI_ERR_DATALEN;
}
@@ -581,7 +560,6 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
r2t->sent = 0;
tcp_task->exp_datasn = r2tsn + 1;
- kfifo_in(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
conn->r2t_pdus_cnt++;
iscsi_requeue_task(task);
@@ -709,18 +687,18 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
break;
case ISCSI_OP_R2T:
- spin_lock(&conn->session->lock);
task = iscsi_itt_to_ctask(conn, hdr->itt);
if (!task)
rc = ISCSI_ERR_BAD_ITT;
else if (ahslen)
rc = ISCSI_ERR_AHSLEN;
else if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
+ spin_lock(&task->lock);
task->last_xfer = jiffies;
rc = iscsi_tcp_r2t_rsp(conn, task);
+ spin_unlock(&task->lock);
} else
rc = ISCSI_ERR_PROTO;
- spin_unlock(&conn->session->lock);
break;
case ISCSI_OP_LOGIN_RSP:
case ISCSI_OP_TEXT_RSP:
@@ -846,6 +824,123 @@ inline int iscsi_tcp_recv_segment_is_hdr(struct iscsi_tcp_conn *tcp_conn)
}
EXPORT_SYMBOL_GPL(iscsi_tcp_recv_segment_is_hdr);
+static unsigned int iscsi_tcp_copy_skb(struct iscsi_tcp_conn *tcp_conn,
+ struct iscsi_segment *segment,
+ struct sk_buff *skb, unsigned int offset,
+ int *status)
+{
+ struct iscsi_conn *conn = tcp_conn->iscsi_conn;
+ struct skb_seq_state seq;
+ unsigned int consumed = 0;
+ int rc = 0;
+
+ skb_prepare_seq_read(skb, offset, skb->len, &seq);
+ while (1) {
+ unsigned int avail;
+ const u8 *ptr;
+
+ avail = skb_seq_read(consumed, &ptr, &seq);
+ if (avail == 0) {
+ ISCSI_DBG_TCP(conn, "no more data avail. Consumed %d\n",
+ consumed);
+ *status = ISCSI_TCP_SKB_DONE;
+ break;
+ }
+ BUG_ON(segment->copied >= segment->size);
+
+ ISCSI_DBG_TCP(conn, "skb %p ptr=%p avail=%u\n", skb, ptr,
+ avail);
+ rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
+ BUG_ON(rc == 0);
+ consumed += rc;
+
+ if (segment->total_copied >= segment->total_size) {
+ *status = ISCSI_TCP_SEGMENT_DONE;
+ break;
+ }
+ }
+
+ skb_abort_seq_read(&seq);
+ return consumed;
+}
+
+static void iscsi_tcp_dma_cleanup(struct iscsi_tcp_conn *tcp_conn,
+ struct sock *sk)
+{
+ struct sk_buff *skb;
+ dma_cookie_t done, used, skb_cookie = 0;
+
+ if (!tcp_conn->dma_chan || tcp_conn->dma_cookie <= 0)
+ return;
+
+ dma_async_memcpy_issue_pending(tcp_conn->dma_chan);
+
+ while (dma_async_memcpy_complete(tcp_conn->dma_chan,
+ tcp_conn->dma_cookie, &done, &used) ==
+ DMA_IN_PROGRESS) {
+ /* do partial cleanup of sk_async_wait_queue up to done*/
+ do {
+ skb = skb_peek(&sk->sk_async_wait_queue);
+ if (NULL == skb || done < skb->dma_cookie)
+ break;
+ else {
+ skb_cookie = skb->dma_cookie;
+ __skb_dequeue(&sk->sk_async_wait_queue);
+ kfree_skb(skb);
+ }
+
+ } while (skb_cookie != done);
+ }
+
+ /* Safe to free early-copied skbs now */
+ __skb_queue_purge(&sk->sk_async_wait_queue);
+ tcp_conn->dma_cookie = 0;
+}
+
+static unsigned int iscsi_tcp_dma_skb(struct iscsi_tcp_conn *tcp_conn,
+ struct iscsi_segment *segment,
+ struct sk_buff *skb, unsigned int offset,
+ int *status)
+{
+ unsigned int copy = 0, copied = 0;
+ dma_cookie_t cookie;
+
+ if (skb->len == offset) {
+ *status = ISCSI_TCP_SKB_DONE;
+ return 0;
+ }
+
+ while (!iscsi_tcp_segment_done(tcp_conn, segment, 1, copy)) {
+ copy = min(skb->len - (offset + copied),
+ segment->size - segment->copied);
+ if (!copy)
+ break;
+
+ ISCSI_DBG_TCP(tcp_conn->iscsi_conn, "dma: copying %u, skb len "
+ "%u, offset %u, copied %u, segment sz %u "
+ "segment copied %u data %p\n", copy, skb->len,
+ offset, copied, segment->size, segment->copied,
+ segment->data);
+ cookie = 0;
+ WARN_ON_ONCE(dma_skb_copy_bits(tcp_conn->dma_chan, skb,
+ offset + copied,
+ segment->data + segment->copied,
+ copy, &cookie));
+
+ if (cookie > 0)
+ tcp_conn->dma_cookie = cookie;
+
+ copied += copy;
+ }
+
+ iscsi_tcp_dma_cleanup(tcp_conn, tcp_conn->sk);
+ if (segment->total_copied >= segment->total_size)
+ *status = ISCSI_TCP_SEGMENT_DONE;
+ else
+ *status = ISCSI_TCP_SKB_DONE;
+ return copied;
+}
+
/**
* iscsi_tcp_recv_skb - Process skb
* @conn: iscsi connection
@@ -861,7 +956,6 @@ int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
{
struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
struct iscsi_segment *segment = &tcp_conn->in.segment;
- struct skb_seq_state seq;
unsigned int consumed = 0;
int rc = 0;
@@ -879,39 +973,18 @@ int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
return 0;
}
- if (offloaded) {
- segment->total_copied = segment->total_size;
- goto segment_done;
- }
-
- skb_prepare_seq_read(skb, offset, skb->len, &seq);
- while (1) {
- unsigned int avail;
- const u8 *ptr;
-
- avail = skb_seq_read(consumed, &ptr, &seq);
- if (avail == 0) {
- ISCSI_DBG_TCP(conn, "no more data avail. Consumed %d\n",
- consumed);
- *status = ISCSI_TCP_SKB_DONE;
- skb_abort_seq_read(&seq);
+ if (!offloaded) {
+ if (tcp_conn->dma_chan)
+ consumed = iscsi_tcp_dma_skb(tcp_conn, segment, skb,
+ offset, status);
+ else
+ consumed = iscsi_tcp_copy_skb(tcp_conn, segment, skb,
+ offset, status);
+ if (*status == ISCSI_TCP_SKB_DONE)
goto skb_done;
- }
- BUG_ON(segment->copied >= segment->size);
-
- ISCSI_DBG_TCP(conn, "skb %p ptr=%p avail=%u\n", skb, ptr,
- avail);
- rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
- BUG_ON(rc == 0);
- consumed += rc;
-
- if (segment->total_copied >= segment->total_size) {
- skb_abort_seq_read(&seq);
- goto segment_done;
- }
}
-segment_done:
+ segment->total_copied = segment->total_size;
*status = ISCSI_TCP_SEGMENT_DONE;
ISCSI_DBG_TCP(conn, "segment done\n");
rc = segment->done(tcp_conn, segment);
@@ -952,7 +1025,6 @@ int iscsi_tcp_task_init(struct iscsi_task *task)
return conn->session->tt->init_pdu(task, 0, task->data_count);
}
- BUG_ON(kfifo_len(&tcp_task->r2tqueue));
tcp_task->exp_datasn = 0;
/* Prepare PDU, optionally w/ immediate data */
@@ -969,36 +1041,20 @@ EXPORT_SYMBOL_GPL(iscsi_tcp_task_init);
static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
{
- struct iscsi_session *session = task->conn->session;
struct iscsi_tcp_task *tcp_task = task->dd_data;
struct iscsi_r2t_info *r2t = NULL;
if (iscsi_task_has_unsol_data(task))
r2t = &task->unsol_r2t;
else {
- spin_lock_bh(&session->lock);
- if (tcp_task->r2t) {
- r2t = tcp_task->r2t;
- /* Continue with this R2T? */
- if (r2t->data_length <= r2t->sent) {
- ISCSI_DBG_TCP(task->conn,
- " done with r2t %p\n", r2t);
- kfifo_in(&tcp_task->r2tpool.queue,
- (void *)&tcp_task->r2t,
- sizeof(void *));
- tcp_task->r2t = r2t = NULL;
- }
- }
-
- if (r2t == NULL) {
- if (kfifo_out(&tcp_task->r2tqueue,
- (void *)&tcp_task->r2t, sizeof(void *)) !=
- sizeof(void *))
- r2t = NULL;
- else
- r2t = tcp_task->r2t;
- }
- spin_unlock_bh(&session->lock);
+ spin_lock_bh(&task->lock);
+ /* do we have r2t data to send */
+ if (tcp_task->r2t.data_length <= tcp_task->r2t.sent) {
+ ISCSI_DBG_TCP(task->conn, "No write data %u %u\n",
+ r2t->data_length, r2t->sent);
+ } else
+ r2t = &tcp_task->r2t;
+ spin_unlock_bh(&task->lock);
}
return r2t;
@@ -1059,7 +1115,9 @@ flush:
return rc;
}
+ spin_lock_bh(&task->lock);
r2t->sent += r2t->data_count;
+ spin_unlock_bh(&task->lock);
goto flush;
}
EXPORT_SYMBOL_GPL(iscsi_tcp_task_xmit);
@@ -1105,67 +1163,6 @@ void iscsi_tcp_conn_teardown(struct iscsi_cls_conn *cls_conn)
}
EXPORT_SYMBOL_GPL(iscsi_tcp_conn_teardown);
-int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
-{
- int i;
- int cmd_i;
-
- /*
- * initialize per-task: R2T pool and xmit queue
- */
- for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
- struct iscsi_task *task = session->cmds[cmd_i];
- struct iscsi_tcp_task *tcp_task = task->dd_data;
-
- /*
- * pre-allocated x2 as much r2ts to handle race when
- * target acks DataOut faster than we data_xmit() queues
- * could replenish r2tqueue.
- */
-
- /* R2T pool */
- if (iscsi_pool_init(&tcp_task->r2tpool,
- session->max_r2t * 2, NULL,
- sizeof(struct iscsi_r2t_info))) {
- goto r2t_alloc_fail;
- }
-
- /* R2T xmit queue */
- if (kfifo_alloc(&tcp_task->r2tqueue,
- session->max_r2t * 4 * sizeof(void*), GFP_KERNEL)) {
- iscsi_pool_free(&tcp_task->r2tpool);
- goto r2t_alloc_fail;
- }
- }
-
- return 0;
-
-r2t_alloc_fail:
- for (i = 0; i < cmd_i; i++) {
- struct iscsi_task *task = session->cmds[i];
- struct iscsi_tcp_task *tcp_task = task->dd_data;
-
- kfifo_free(&tcp_task->r2tqueue);
- iscsi_pool_free(&tcp_task->r2tpool);
- }
- return -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(iscsi_tcp_r2tpool_alloc);
-
-void iscsi_tcp_r2tpool_free(struct iscsi_session *session)
-{
- int i;
-
- for (i = 0; i < session->cmds_max; i++) {
- struct iscsi_task *task = session->cmds[i];
- struct iscsi_tcp_task *tcp_task = task->dd_data;
-
- kfifo_free(&tcp_task->r2tqueue);
- iscsi_pool_free(&tcp_task->r2tpool);
- }
-}
-EXPORT_SYMBOL_GPL(iscsi_tcp_r2tpool_free);
-
void iscsi_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
struct iscsi_stats *stats)
{
diff --git a/include/net/netdma.h b/include/net/netdma.h
index 8ba8ce2..3e275c4 100644
--- a/include/net/netdma.h
+++ b/include/net/netdma.h
@@ -27,6 +27,8 @@
int dma_skb_copy_datagram_iovec(struct dma_chan* chan,
struct sk_buff *skb, int offset, struct iovec *to,
size_t len, struct dma_pinned_list *pinned_list);
+int dma_skb_copy_bits(struct dma_chan *chan, struct sk_buff *skb,
+ int offset, void *to, int len, dma_cookie_t *dma_cookie);
#endif /* CONFIG_NET_DMA */
#endif /* NETDMA_H */
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 748382b..59ea47d 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -121,6 +121,7 @@ struct iscsi_task {
unsigned imm_count; /* imm-data (bytes) */
/* offset in unsolicited stream (bytes); */
+ spinlock_t lock; /* protects r2t state, */
struct iscsi_r2t_info unsol_r2t;
char *data; /* mgmt payload */
unsigned data_count;
@@ -297,8 +298,7 @@ struct iscsi_session {
* sequence numbers, *
* session resources: *
* - cmdpool, *
- * - mgmtpool, *
- * - r2tpool */
+ * - mgmtpool, */
int state; /* session state */
int age; /* counts session re-opens */
diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h
index 741ae7e..fa1537f 100644
--- a/include/scsi/libiscsi_tcp.h
+++ b/include/scsi/libiscsi_tcp.h
@@ -21,6 +21,7 @@
#ifndef LIBISCSI_TCP_H
#define LIBISCSI_TCP_H
+#include <linux/dmaengine.h>
#include <scsi/libiscsi.h>
struct iscsi_tcp_conn;
@@ -64,6 +65,8 @@ struct iscsi_tcp_recv {
};
struct iscsi_tcp_conn {
+ struct sock *sk; /* tmp hack for tcp dma */
+
struct iscsi_conn *iscsi_conn;
void *dd_data;
int stop_stage; /* conn_stop() flag: *
@@ -73,14 +76,16 @@ struct iscsi_tcp_conn {
struct iscsi_tcp_recv in; /* TCP receive context */
/* CRC32C (Rx) LLD should set this is they do not offload */
struct hash_desc *rx_hash;
+
+ /* optional dma channel fields */
+ struct dma_chan *dma_chan;
+ dma_cookie_t dma_cookie; /* last DMA cookie returned */
};
struct iscsi_tcp_task {
uint32_t exp_datasn; /* expected target's R2TSN/DataSN */
int data_offset;
- struct iscsi_r2t_info *r2t; /* in progress solict R2T */
- struct iscsi_pool r2tpool;
- struct kfifo r2tqueue;
+ struct iscsi_r2t_info r2t; /* in progress solict R2T */
void *dd_data;
};
@@ -125,9 +130,33 @@ iscsi_tcp_conn_setup(struct iscsi_cls_session *cls_session, int dd_data_size,
extern void iscsi_tcp_conn_teardown(struct iscsi_cls_conn *cls_conn);
/* misc helpers */
-extern int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session);
-extern void iscsi_tcp_r2tpool_free(struct iscsi_session *session);
-
extern void iscsi_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
struct iscsi_stats *stats);
+
+/* DMA Helpers */
+#ifdef CONFIG_NET_DMA
+
+void iscsi_tcp_init_dma_sock(struct sock *sk)
+{
+ skb_queue_head_init(&sk->sk_async_wait_queue);
+}
+
+void iscsi_tcp_set_dma_chan(struct iscsi_tcp_conn *tcp_conn)
+{
+ tcp_conn->dma_chan = dma_find_channel(DMA_MEMCPY);
+}
+
+#else
+
+static inline void iscsi_tcp_init_dma_sock(struct sock *sk)
+{
+ return 0;
+}
+
+static void iscsi_tcp_set_dma_chan(struct iscsi_tcp_conn *tcp_conn)
+{
+ return;
+}
+#endif /* CONFIG_NET_DMA */
+
#endif /* LIBISCSI_TCP_H */
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 25d717e..4964e73 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -29,6 +29,7 @@
#include <linux/socket.h>
#include <net/tcp.h>
#include <net/netdma.h>
+#include "kmap_skb.h"
#define NET_DMA_DEFAULT_COPYBREAK 4096
@@ -128,3 +129,105 @@ end:
fault:
return -EFAULT;
}
+
+/**
+ * dma_skb_copy_bits - Copy a skb to a buffer
+ * @chan: dma channel
+ * @skb: buffer to copy
+ * @offset: offset in the buffer to start copying from
+ * @to: buffer to copy to
+ * @len: amount of data to copy from buffer to to buffer
+ * @dma_cookie: dma cookie
+ */
+int dma_skb_copy_bits(struct dma_chan *chan, struct sk_buff *skb,
+ int offset, void *to, int len, dma_cookie_t *dma_cookie)
+{
+ int start = skb_headlen(skb);
+ struct sk_buff *frag_iter;
+ int i, copy;
+
+ *dma_cookie = 0;
+
+ if (offset > (int)skb->len - len)
+ goto fault;
+
+ /* Copy header. */
+ if ((copy = start - offset) > 0) {
+ if (copy > len)
+ copy = len;
+
+ skb->dma_cookie = *dma_cookie =
+ dma_async_memcpy_buf_to_buf(chan, to,
+ skb->data + offset,
+ copy);
+ if (skb->dma_cookie < 0)
+ goto fault;
+
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ u8 *vaddr;
+
+ if (copy > len)
+ copy = len;
+
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+
+ skb->dma_cookie = *dma_cookie =
+ dma_async_memcpy_buf_to_buf(chan, to,
+ vaddr +skb_shinfo(skb)->
+ frags[i].page_offset +
+ offset - start, copy);
+ if (skb->dma_cookie < 0)
+ goto fault;
+
+ kunmap_skb_frag(vaddr);
+
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+ start = end;
+ }
+
+
+ if (skb->dma_cookie > 0)
+ *dma_cookie = skb->dma_cookie;
+
+ skb_walk_frags(skb, frag_iter) {
+ int end;
+
+ WARN_ON(start > offset + len);
+
+ end = start + frag_iter->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (dma_skb_copy_bits(chan, frag_iter, offset - start,
+ to, copy, dma_cookie))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+ start = end;
+ }
+ if (!len)
+ return 0;
+
+fault:
+ return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(dma_skb_copy_bits);