On 04/04/2008 03:54 PM, supreeth wrote:
throughput using Intel 1Gb ethernet cards (e1000) and a significant
increase
in throughput using the Intel 10Gb cards.


For writes I am getting 1000MB/s (netperf reports 1100), but for reads I am getting only 600 MB/s. On the write side we use sendpage which is zero copy. For reads though, we use memcpy. perf traces are showing we are in that memcpy a lot. So I thought these patches might help since Supreeth had also mentioned a significant increase in throughput.

The attached patch ports Supreeth's patches to linus's tree.

For reads I just do something like

fio --filename=/dev/sdXYZ --direct=1 --rw=randread --bs=1m --size=10G --numjobs=4 --runtime=10 --group_reporting --name=file1

For writes I do:

fio --filename=/dev/XYZ --direct=1 --rw=randwrite --bs=1m --size=10G --numjobs=4 --runtime=10 --group_reporting --name=file1

The iscsi target disks are memory backed disks, so not actually going to real spinning disks :)

Also in the attached patch is a change to the r2t code which speeds up writes when the IO size requires R2Ts.

With Linus's tree I had to use the "noop" io scheduler, turn of iptables (/etc/init.d/iptables stop" on fedora/RHEL systems), turn off irqbalance, turn off cpuspeed, and then play around with the /sys/block/sdXYZ/queue/rq_affinity setting. Also it sometimes helped when the the MaxRecvDataSegmentLength and MaxXmitDataSegmentLength were larger (128 - 256K) and then the /sys/block/sdXYZ/queue/max_sectors_kb matched them.

And make sure ioatdma is loaded and that /sys/class/dma/ has some dma channels.

If you have a fast system with ioatdma please try it out.

--
You received this message because you are subscribed to the Google Groups 
"open-iscsi" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/open-iscsi?hl=en.

diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index d2ad3d6..42b6861 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -2158,11 +2158,10 @@ int cxgbi_set_conn_param(struct iscsi_cls_conn *cls_conn,
 			enum iscsi_param param, char *buf, int buflen)
 {
 	struct iscsi_conn *conn = cls_conn->dd_data;
-	struct iscsi_session *session = conn->session;
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct cxgbi_conn *cconn = tcp_conn->dd_data;
 	struct cxgbi_sock *csk = cconn->cep->csk;
-	int value, err = 0;
+	int err = 0;
 
 	log_debug(1 << CXGBI_DBG_ISCSI,
 		"cls_conn 0x%p, param %d, buf(%d) %s.\n",
@@ -2183,16 +2182,6 @@ int cxgbi_set_conn_param(struct iscsi_cls_conn *cls_conn,
 							conn->hdrdgst_en,
 							conn->datadgst_en, 0);
 		break;
-	case ISCSI_PARAM_MAX_R2T:
-		sscanf(buf, "%d", &value);
-		if (value <= 0 || !is_power_of_2(value))
-			return -EINVAL;
-		if (session->max_r2t == value)
-			break;
-		iscsi_tcp_r2tpool_free(session);
-		err = iscsi_set_param(cls_conn, param, buf, buflen);
-		if (!err && iscsi_tcp_r2tpool_alloc(session))
-			return -ENOMEM;
 	case ISCSI_PARAM_MAX_RECV_DLENGTH:
 		err = iscsi_set_param(cls_conn, param, buf, buflen);
 		if (!err)
@@ -2325,7 +2314,6 @@ struct iscsi_cls_session *cxgbi_create_session(struct iscsi_endpoint *ep,
 	struct cxgbi_hba *chba;
 	struct Scsi_Host *shost;
 	struct iscsi_cls_session *cls_session;
-	struct iscsi_session *session;
 
 	if (!ep) {
 		pr_err("missing endpoint.\n");
@@ -2346,17 +2334,9 @@ struct iscsi_cls_session *cxgbi_create_session(struct iscsi_endpoint *ep,
 	if (!cls_session)
 		return NULL;
 
-	session = cls_session->dd_data;
-	if (iscsi_tcp_r2tpool_alloc(session))
-		goto remove_session;
-
 	log_debug(1 << CXGBI_DBG_ISCSI,
 		"ep 0x%p, cls sess 0x%p.\n", ep, cls_session);
 	return cls_session;
-
-remove_session:
-	iscsi_session_teardown(cls_session);
-	return NULL;
 }
 EXPORT_SYMBOL_GPL(cxgbi_create_session);
 
@@ -2365,7 +2345,6 @@ void cxgbi_destroy_session(struct iscsi_cls_session *cls_session)
 	log_debug(1 << CXGBI_DBG_ISCSI,
 		"cls sess 0x%p.\n", cls_session);
 
-	iscsi_tcp_r2tpool_free(cls_session->dd_data);
 	iscsi_session_teardown(cls_session);
 }
 EXPORT_SYMBOL_GPL(cxgbi_destroy_session);
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index fec47de..9edb244 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -121,12 +121,46 @@ static inline int iscsi_sw_sk_state_check(struct sock *sk)
 	return 0;
 }
 
+static void iscsi_tcp_dma_cleanup(struct iscsi_tcp_conn *tcp_conn,
+				  struct sock *sk)
+{
+	struct sk_buff *skb;
+	dma_cookie_t done, used, skb_cookie = 0;
+
+	if (!tcp_conn->dma_chan || tcp_conn->dma_cookie <= 0)
+		return;
+
+	dma_async_memcpy_issue_pending(tcp_conn->dma_chan);
+
+	while (dma_async_memcpy_complete(tcp_conn->dma_chan,
+					 tcp_conn->dma_cookie, &done, &used) ==
+					 DMA_IN_PROGRESS) {
+		/* do partial cleanup of sk_async_wait_queue up to done*/
+		do {
+			skb = skb_peek(&sk->sk_async_wait_queue);
+			if (NULL == skb || done < skb->dma_cookie)
+				break;
+			else {
+				skb_cookie = skb->dma_cookie;
+				__skb_dequeue(&sk->sk_async_wait_queue);
+					      kfree_skb(skb);
+			}
+
+		} while (skb_cookie != done);
+	}
+
+	/* Safe to free early-copied skbs now */
+	__skb_queue_purge(&sk->sk_async_wait_queue);
+	tcp_conn->dma_cookie = 0;
+}
+
 static void iscsi_sw_tcp_data_ready(struct sock *sk, int flag)
 {
 	struct iscsi_conn *conn = sk->sk_user_data;
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	read_descriptor_t rd_desc;
 
+	iscsi_tcp_set_dma_chan(tcp_conn);
 	read_lock(&sk->sk_callback_lock);
 
 	/*
@@ -146,6 +180,8 @@ static void iscsi_sw_tcp_data_ready(struct sock *sk, int flag)
 	/* If we had to (atomically) map a highmem page,
 	 * unmap it now. */
 	iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
+	/* Leak? If we stop the conn mid segment then could this leak */
+//	iscsi_tcp_dma_cleanup(tcp_conn, sk);
 }
 
 static void iscsi_sw_tcp_state_change(struct sock *sk)
@@ -698,6 +734,10 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
 	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
 	sk->sk_allocation = GFP_ATOMIC;
 
+	iscsi_tcp_init_dma_sock(sk);
+
+	tcp_conn->sk = sk;
+
 	iscsi_sw_tcp_conn_set_callbacks(conn);
 	tcp_sw_conn->sendpage = tcp_sw_conn->sock->ops->sendpage;
 	/*
@@ -716,10 +756,8 @@ static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
 				       int buflen)
 {
 	struct iscsi_conn *conn = cls_conn->dd_data;
-	struct iscsi_session *session = conn->session;
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
-	int value;
 
 	switch(param) {
 	case ISCSI_PARAM_HDRDGST_EN:
@@ -730,17 +768,6 @@ static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
 		tcp_sw_conn->sendpage = conn->datadgst_en ?
 			sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage;
 		break;
-	case ISCSI_PARAM_MAX_R2T:
-		sscanf(buf, "%d", &value);
-		if (value <= 0 || !is_power_of_2(value))
-			return -EINVAL;
-		if (session->max_r2t == value)
-			break;
-		iscsi_tcp_r2tpool_free(session);
-		iscsi_set_param(cls_conn, param, buf, buflen);
-		if (iscsi_tcp_r2tpool_alloc(session))
-			return -ENOMEM;
-		break;
 	default:
 		return iscsi_set_param(cls_conn, param, buf, buflen);
 	}
@@ -827,12 +854,8 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
 	session = cls_session->dd_data;
 
 	shost->can_queue = session->scsi_cmds_max;
-	if (iscsi_tcp_r2tpool_alloc(session))
-		goto remove_session;
 	return cls_session;
 
-remove_session:
-	iscsi_session_teardown(cls_session);
 remove_host:
 	iscsi_host_remove(shost);
 free_host:
@@ -844,7 +867,6 @@ static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
 {
 	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
 
-	iscsi_tcp_r2tpool_free(cls_session->dd_data);
 	iscsi_session_teardown(cls_session);
 
 	iscsi_host_remove(shost);
@@ -959,11 +981,13 @@ static int __init iscsi_sw_tcp_init(void)
 	if (!iscsi_sw_tcp_scsi_transport)
 		return -ENODEV;
 
+	net_dmaengine_get();
 	return 0;
 }
 
 static void __exit iscsi_sw_tcp_exit(void)
 {
+	net_dmaengine_put();
 	iscsi_unregister_transport(&iscsi_sw_tcp_transport);
 }
 
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index da8b615..7b4bdd4 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2780,6 +2780,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
 		task->itt = cmd_i;
 		task->state = ISCSI_TASK_FREE;
 		INIT_LIST_HEAD(&task->running);
+		spin_lock_init(&task->lock);
 	}
 
 	if (!try_module_get(iscsit->owner))
@@ -2884,6 +2885,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 					 get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	if (!data)
 		goto login_task_data_alloc_fail;
+
 	conn->login_task->data = conn->data = data;
 
 	init_timer(&conn->tmf_timer);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 8eeb39f..0460ef2 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -37,6 +37,7 @@
 #include <linux/kfifo.h>
 #include <linux/scatterlist.h>
 #include <net/tcp.h>
+#include <net/netdma.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
@@ -439,25 +440,14 @@ iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
 void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 {
 	struct iscsi_tcp_task *tcp_task = task->dd_data;
-	struct iscsi_r2t_info *r2t;
 
 	/* nothing to do for mgmt */
 	if (!task->sc)
 		return;
 
-	/* flush task's r2t queues */
-	while (kfifo_out(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
-			    sizeof(void*));
-		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
-	}
-
-	r2t = tcp_task->r2t;
-	if (r2t != NULL) {
-		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
-			    sizeof(void*));
-		tcp_task->r2t = NULL;
-	}
+	/* reset task's r2t */
+	tcp_task->r2t.data_length = 0;
+	tcp_task->r2t.sent = 0;
 }
 EXPORT_SYMBOL_GPL(iscsi_tcp_cleanup_task);
 
@@ -515,9 +505,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	struct iscsi_tcp_task *tcp_task = task->dd_data;
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
-	struct iscsi_r2t_info *r2t;
+	struct iscsi_r2t_info *r2t = &tcp_task->r2t;
 	int r2tsn = be32_to_cpu(rhdr->r2tsn);
-	int rc;
 
 	if (tcp_conn->in.datalen) {
 		iscsi_conn_printk(KERN_ERR, conn,
@@ -542,21 +531,12 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = kfifo_out(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
-	if (!rc) {
-		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
-				  "Target has sent more R2Ts than it "
-				  "negotiated for or driver has has leaked.\n");
-		return ISCSI_ERR_PROTO;
-	}
-
+	memset(r2t, 0, sizeof(*r2t));
 	r2t->exp_statsn = rhdr->statsn;
 	r2t->data_length = be32_to_cpu(rhdr->data_length);
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
-			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
 
@@ -571,8 +551,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
-			    sizeof(void*));
+		r2t->data_length = 0;
 		return ISCSI_ERR_DATALEN;
 	}
 
@@ -581,7 +560,6 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	kfifo_in(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -709,18 +687,18 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
 		break;
 	case ISCSI_OP_R2T:
-		spin_lock(&conn->session->lock);
 		task = iscsi_itt_to_ctask(conn, hdr->itt);
 		if (!task)
 			rc = ISCSI_ERR_BAD_ITT;
 		else if (ahslen)
 			rc = ISCSI_ERR_AHSLEN;
 		else if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
+			spin_lock(&task->lock);
 			task->last_xfer = jiffies;
 			rc = iscsi_tcp_r2t_rsp(conn, task);
+			spin_unlock(&task->lock);
 		} else
 			rc = ISCSI_ERR_PROTO;
-		spin_unlock(&conn->session->lock);
 		break;
 	case ISCSI_OP_LOGIN_RSP:
 	case ISCSI_OP_TEXT_RSP:
@@ -846,6 +824,123 @@ inline int iscsi_tcp_recv_segment_is_hdr(struct iscsi_tcp_conn *tcp_conn)
 }
 EXPORT_SYMBOL_GPL(iscsi_tcp_recv_segment_is_hdr);
 
+static unsigned int iscsi_tcp_copy_skb(struct iscsi_tcp_conn *tcp_conn,
+				       struct iscsi_segment *segment,
+				       struct sk_buff *skb, unsigned int offset,
+				       int *status)
+{
+	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
+	struct skb_seq_state seq;
+	unsigned int consumed = 0;
+	int rc = 0;
+
+	skb_prepare_seq_read(skb, offset, skb->len, &seq);
+	while (1) {
+		unsigned int avail;
+		const u8 *ptr;
+
+		avail = skb_seq_read(consumed, &ptr, &seq);
+		if (avail == 0) {
+			ISCSI_DBG_TCP(conn, "no more data avail. Consumed %d\n",
+				      consumed);
+			*status = ISCSI_TCP_SKB_DONE;
+			break;
+		}
+		BUG_ON(segment->copied >= segment->size);
+
+		ISCSI_DBG_TCP(conn, "skb %p ptr=%p avail=%u\n", skb, ptr,
+			      avail);
+		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
+		BUG_ON(rc == 0);
+		consumed += rc;
+
+		if (segment->total_copied >= segment->total_size) {
+			*status = ISCSI_TCP_SEGMENT_DONE;
+			break;
+		}
+	}
+
+	skb_abort_seq_read(&seq);
+	return consumed;
+}
+
+static void iscsi_tcp_dma_cleanup(struct iscsi_tcp_conn *tcp_conn,
+				  struct sock *sk)
+{
+	struct sk_buff *skb;
+	dma_cookie_t done, used, skb_cookie = 0;
+
+	if (!tcp_conn->dma_chan || tcp_conn->dma_cookie <= 0)
+		return;
+
+	dma_async_memcpy_issue_pending(tcp_conn->dma_chan);
+
+	while (dma_async_memcpy_complete(tcp_conn->dma_chan,
+					 tcp_conn->dma_cookie, &done, &used) ==
+					 DMA_IN_PROGRESS) {
+		/* do partial cleanup of sk_async_wait_queue up to done*/
+		do {
+			skb = skb_peek(&sk->sk_async_wait_queue);
+			if (NULL == skb || done < skb->dma_cookie)
+				break;
+			else {
+				skb_cookie = skb->dma_cookie;
+				__skb_dequeue(&sk->sk_async_wait_queue);
+					      kfree_skb(skb);
+			}
+
+		} while (skb_cookie != done);
+	}
+
+	/* Safe to free early-copied skbs now */
+	__skb_queue_purge(&sk->sk_async_wait_queue);
+	tcp_conn->dma_cookie = 0;
+}
+
+static unsigned int iscsi_tcp_dma_skb(struct iscsi_tcp_conn *tcp_conn,
+				      struct iscsi_segment *segment,
+				      struct sk_buff *skb, unsigned int offset,
+				      int *status)
+{
+	unsigned int copy = 0, copied = 0;
+	dma_cookie_t cookie;
+
+	if (skb->len == offset) {
+		*status = ISCSI_TCP_SKB_DONE;
+		return 0;
+	}
+
+	while (!iscsi_tcp_segment_done(tcp_conn, segment, 1, copy)) {
+		copy = min(skb->len - (offset + copied),
+			   segment->size - segment->copied);
+		if (!copy)
+			break;
+
+		ISCSI_DBG_TCP(tcp_conn->iscsi_conn, "dma: copying %u, skb len "
+			      "%u, offset %u, copied %u, segment sz %u "
+			      "segment copied %u data %p\n", copy, skb->len,
+			      offset, copied, segment->size, segment->copied,
+				segment->data);
+		cookie = 0;
+		WARN_ON_ONCE(dma_skb_copy_bits(tcp_conn->dma_chan, skb,
+					       offset + copied,
+					       segment->data + segment->copied,
+					       copy, &cookie));
+
+		if (cookie > 0)
+			tcp_conn->dma_cookie = cookie;
+
+		copied += copy;
+	}
+
+	iscsi_tcp_dma_cleanup(tcp_conn, tcp_conn->sk);
+	if (segment->total_copied >= segment->total_size)
+		*status = ISCSI_TCP_SEGMENT_DONE;
+	else
+		*status = ISCSI_TCP_SKB_DONE;
+	return copied;
+}
+
 /**
  * iscsi_tcp_recv_skb - Process skb
  * @conn: iscsi connection
@@ -861,7 +956,6 @@ int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
 {
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct iscsi_segment *segment = &tcp_conn->in.segment;
-	struct skb_seq_state seq;
 	unsigned int consumed = 0;
 	int rc = 0;
 
@@ -879,39 +973,18 @@ int iscsi_tcp_recv_skb(struct iscsi_conn *conn, struct sk_buff *skb,
 		return 0;
 	}
 
-	if (offloaded) {
-		segment->total_copied = segment->total_size;
-		goto segment_done;
-	}
-
-	skb_prepare_seq_read(skb, offset, skb->len, &seq);
-	while (1) {
-		unsigned int avail;
-		const u8 *ptr;
-
-		avail = skb_seq_read(consumed, &ptr, &seq);
-		if (avail == 0) {
-			ISCSI_DBG_TCP(conn, "no more data avail. Consumed %d\n",
-				      consumed);
-			*status = ISCSI_TCP_SKB_DONE;
-			skb_abort_seq_read(&seq);
+	if (!offloaded) {
+		if (tcp_conn->dma_chan)
+			consumed = iscsi_tcp_dma_skb(tcp_conn, segment, skb,
+						     offset, status);
+		else
+			consumed = iscsi_tcp_copy_skb(tcp_conn, segment, skb,
+						      offset, status);
+		if (*status == ISCSI_TCP_SKB_DONE)
 			goto skb_done;
-		}
-		BUG_ON(segment->copied >= segment->size);
-
-		ISCSI_DBG_TCP(conn, "skb %p ptr=%p avail=%u\n", skb, ptr,
-			      avail);
-		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
-		BUG_ON(rc == 0);
-		consumed += rc;
-
-		if (segment->total_copied >= segment->total_size) {
-			skb_abort_seq_read(&seq);
-			goto segment_done;
-		}
 	}
 
-segment_done:
+	segment->total_copied = segment->total_size;
 	*status = ISCSI_TCP_SEGMENT_DONE;
 	ISCSI_DBG_TCP(conn, "segment done\n");
 	rc = segment->done(tcp_conn, segment);
@@ -952,7 +1025,6 @@ int iscsi_tcp_task_init(struct iscsi_task *task)
 		return conn->session->tt->init_pdu(task, 0, task->data_count);
 	}
 
-	BUG_ON(kfifo_len(&tcp_task->r2tqueue));
 	tcp_task->exp_datasn = 0;
 
 	/* Prepare PDU, optionally w/ immediate data */
@@ -969,36 +1041,20 @@ EXPORT_SYMBOL_GPL(iscsi_tcp_task_init);
 
 static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 {
-	struct iscsi_session *session = task->conn->session;
 	struct iscsi_tcp_task *tcp_task = task->dd_data;
 	struct iscsi_r2t_info *r2t = NULL;
 
 	if (iscsi_task_has_unsol_data(task))
 		r2t = &task->unsol_r2t;
 	else {
-		spin_lock_bh(&session->lock);
-		if (tcp_task->r2t) {
-			r2t = tcp_task->r2t;
-			/* Continue with this R2T? */
-			if (r2t->data_length <= r2t->sent) {
-				ISCSI_DBG_TCP(task->conn,
-					      "  done with r2t %p\n", r2t);
-				kfifo_in(&tcp_task->r2tpool.queue,
-					    (void *)&tcp_task->r2t,
-					    sizeof(void *));
-				tcp_task->r2t = r2t = NULL;
-			}
-		}
-
-		if (r2t == NULL) {
-			if (kfifo_out(&tcp_task->r2tqueue,
-			    (void *)&tcp_task->r2t, sizeof(void *)) !=
-			    sizeof(void *))
-				r2t = NULL;
-			else
-				r2t = tcp_task->r2t;
-		}
-		spin_unlock_bh(&session->lock);
+		spin_lock_bh(&task->lock);
+		/* do we have r2t data to send */
+		if (tcp_task->r2t.data_length <= tcp_task->r2t.sent) {
+			ISCSI_DBG_TCP(task->conn, "No write data %u %u\n",
+				      r2t->data_length, r2t->sent);
+		} else
+			r2t = &tcp_task->r2t;
+		spin_unlock_bh(&task->lock);
 	}
 
 	return r2t;
@@ -1059,7 +1115,9 @@ flush:
 		return rc;
 	}
 
+	spin_lock_bh(&task->lock);
 	r2t->sent += r2t->data_count;
+	spin_unlock_bh(&task->lock);
 	goto flush;
 }
 EXPORT_SYMBOL_GPL(iscsi_tcp_task_xmit);
@@ -1105,67 +1163,6 @@ void iscsi_tcp_conn_teardown(struct iscsi_cls_conn *cls_conn)
 }
 EXPORT_SYMBOL_GPL(iscsi_tcp_conn_teardown);
 
-int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
-{
-	int i;
-	int cmd_i;
-
-	/*
-	 * initialize per-task: R2T pool and xmit queue
-	 */
-	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
-	        struct iscsi_task *task = session->cmds[cmd_i];
-		struct iscsi_tcp_task *tcp_task = task->dd_data;
-
-		/*
-		 * pre-allocated x2 as much r2ts to handle race when
-		 * target acks DataOut faster than we data_xmit() queues
-		 * could replenish r2tqueue.
-		 */
-
-		/* R2T pool */
-		if (iscsi_pool_init(&tcp_task->r2tpool,
-				    session->max_r2t * 2, NULL,
-				    sizeof(struct iscsi_r2t_info))) {
-			goto r2t_alloc_fail;
-		}
-
-		/* R2T xmit queue */
-		if (kfifo_alloc(&tcp_task->r2tqueue,
-		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL)) {
-			iscsi_pool_free(&tcp_task->r2tpool);
-			goto r2t_alloc_fail;
-		}
-	}
-
-	return 0;
-
-r2t_alloc_fail:
-	for (i = 0; i < cmd_i; i++) {
-		struct iscsi_task *task = session->cmds[i];
-		struct iscsi_tcp_task *tcp_task = task->dd_data;
-
-		kfifo_free(&tcp_task->r2tqueue);
-		iscsi_pool_free(&tcp_task->r2tpool);
-	}
-	return -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(iscsi_tcp_r2tpool_alloc);
-
-void iscsi_tcp_r2tpool_free(struct iscsi_session *session)
-{
-	int i;
-
-	for (i = 0; i < session->cmds_max; i++) {
-		struct iscsi_task *task = session->cmds[i];
-		struct iscsi_tcp_task *tcp_task = task->dd_data;
-
-		kfifo_free(&tcp_task->r2tqueue);
-		iscsi_pool_free(&tcp_task->r2tpool);
-	}
-}
-EXPORT_SYMBOL_GPL(iscsi_tcp_r2tpool_free);
-
 void iscsi_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
 			      struct iscsi_stats *stats)
 {
diff --git a/include/net/netdma.h b/include/net/netdma.h
index 8ba8ce2..3e275c4 100644
--- a/include/net/netdma.h
+++ b/include/net/netdma.h
@@ -27,6 +27,8 @@
 int dma_skb_copy_datagram_iovec(struct dma_chan* chan,
 		struct sk_buff *skb, int offset, struct iovec *to,
 		size_t len, struct dma_pinned_list *pinned_list);
+int dma_skb_copy_bits(struct dma_chan *chan, struct sk_buff *skb,
+		      int offset, void *to, int len, dma_cookie_t *dma_cookie);
 
 #endif /* CONFIG_NET_DMA */
 #endif /* NETDMA_H */
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 748382b..59ea47d 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -121,6 +121,7 @@ struct iscsi_task {
 
 	unsigned		imm_count;	/* imm-data (bytes)   */
 	/* offset in unsolicited stream (bytes); */
+	spinlock_t		lock;		/* protects r2t state, */
 	struct iscsi_r2t_info	unsol_r2t;
 	char			*data;		/* mgmt payload */
 	unsigned		data_count;
@@ -297,8 +298,7 @@ struct iscsi_session {
 						 * sequence numbers,       *
 						 * session resources:      *
 						 * - cmdpool,		   *
-						 * - mgmtpool,		   *
-						 * - r2tpool		   */
+						 * - mgmtpool,		   */
 	int			state;		/* session state           */
 	int			age;		/* counts session re-opens */
 
diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h
index 741ae7e..fa1537f 100644
--- a/include/scsi/libiscsi_tcp.h
+++ b/include/scsi/libiscsi_tcp.h
@@ -21,6 +21,7 @@
 #ifndef LIBISCSI_TCP_H
 #define LIBISCSI_TCP_H
 
+#include <linux/dmaengine.h>
 #include <scsi/libiscsi.h>
 
 struct iscsi_tcp_conn;
@@ -64,6 +65,8 @@ struct iscsi_tcp_recv {
 };
 
 struct iscsi_tcp_conn {
+	struct sock		*sk;	/* tmp hack for tcp dma */
+
 	struct iscsi_conn	*iscsi_conn;
 	void			*dd_data;
 	int			stop_stage;	/* conn_stop() flag: *
@@ -73,14 +76,16 @@ struct iscsi_tcp_conn {
 	struct iscsi_tcp_recv	in;		/* TCP receive context */
 	/* CRC32C (Rx) LLD should set this is they do not offload */
 	struct hash_desc	*rx_hash;
+
+	/* optional dma channel fields */
+	struct dma_chan		*dma_chan;
+	dma_cookie_t		dma_cookie;	/* last DMA cookie returned */
 };
 
 struct iscsi_tcp_task {
 	uint32_t		exp_datasn;	/* expected target's R2TSN/DataSN */
 	int			data_offset;
-	struct iscsi_r2t_info	*r2t;		/* in progress solict R2T */
-	struct iscsi_pool	r2tpool;
-	struct kfifo		r2tqueue;
+	struct iscsi_r2t_info	r2t;		/* in progress solict R2T */
 	void			*dd_data;
 };
 
@@ -125,9 +130,33 @@ iscsi_tcp_conn_setup(struct iscsi_cls_session *cls_session, int dd_data_size,
 extern void iscsi_tcp_conn_teardown(struct iscsi_cls_conn *cls_conn);
 
 /* misc helpers */
-extern int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session);
-extern void iscsi_tcp_r2tpool_free(struct iscsi_session *session);
-
 extern void iscsi_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
 				     struct iscsi_stats *stats);
+
+/* DMA Helpers */
+#ifdef CONFIG_NET_DMA
+
+void iscsi_tcp_init_dma_sock(struct sock *sk)
+{
+        skb_queue_head_init(&sk->sk_async_wait_queue);
+}
+
+void iscsi_tcp_set_dma_chan(struct iscsi_tcp_conn *tcp_conn)
+{
+	tcp_conn->dma_chan = dma_find_channel(DMA_MEMCPY);
+}
+
+#else
+
+static inline void iscsi_tcp_init_dma_sock(struct sock *sk)
+{
+	return 0;
+}
+
+static void iscsi_tcp_set_dma_chan(struct iscsi_tcp_conn *tcp_conn)
+{
+	return;
+}
+#endif /* CONFIG_NET_DMA */
+
 #endif /* LIBISCSI_TCP_H */
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 25d717e..4964e73 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -29,6 +29,7 @@
 #include <linux/socket.h>
 #include <net/tcp.h>
 #include <net/netdma.h>
+#include "kmap_skb.h"
 
 #define NET_DMA_DEFAULT_COPYBREAK 4096
 
@@ -128,3 +129,105 @@ end:
 fault:
 	return -EFAULT;
 }
+
+/**
+ * dma_skb_copy_bits - Copy a skb to a buffer
+ * @chan: dma channel
+ * @skb: buffer to copy
+ * @offset: offset in the buffer to start copying from
+ * @to: buffer to copy to
+ * @len: amount of data to copy from buffer to to buffer
+ * @dma_cookie: dma cookie
+ */
+int dma_skb_copy_bits(struct dma_chan *chan, struct sk_buff *skb,
+		      int offset, void *to, int len, dma_cookie_t *dma_cookie)
+{
+	int start = skb_headlen(skb);
+	struct sk_buff *frag_iter;
+	int i, copy;
+
+	*dma_cookie = 0;
+
+	if (offset > (int)skb->len - len)
+		goto fault;
+
+	/* Copy header. */
+	if ((copy = start - offset) > 0) {
+		if (copy > len)
+			copy = len;
+
+		skb->dma_cookie = *dma_cookie =
+				dma_async_memcpy_buf_to_buf(chan, to,
+							    skb->data + offset,
+							    copy);
+		if (skb->dma_cookie < 0)
+			goto fault;
+
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+		to     += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			u8 *vaddr;
+
+			if (copy > len)
+				copy = len;
+
+			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+
+			skb->dma_cookie = *dma_cookie =
+				dma_async_memcpy_buf_to_buf(chan, to,
+							vaddr +skb_shinfo(skb)->
+							frags[i].page_offset +
+							offset - start, copy);
+			if (skb->dma_cookie < 0)
+				goto fault;
+
+			kunmap_skb_frag(vaddr);
+
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			to     += copy;
+		}
+		start = end;
+	}
+
+	
+	if (skb->dma_cookie > 0)
+		*dma_cookie = skb->dma_cookie;
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			if (dma_skb_copy_bits(chan, frag_iter, offset - start,
+					      to, copy, dma_cookie))
+				goto fault;
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+			to     += copy;
+		}
+		start = end;
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(dma_skb_copy_bits);

Reply via email to