This patch changes the architecture of the CONFIG_ variables set in
the
.config file. Previously CONFIG_NET_DMA enabled I/OAT for TCP. With
this
patch CONFIG_NET_DMA enables the net/core/netdma library which has two
functions. The function dma_skb_copy_datagram_iovec is used by TCP and
dma_skb_copy_bits, which is added by the following patch, is used by
iSCSI.
However, by setting CONFIG_NET_DMA these functions are available to
anyone
who needs them. The idea behind this change was to have some core
library
for networking related DMA operations.
CONFIG_TCP_DMA now enables DMA copies for the TCP subsystem.
CONFIG_ISCSI_DMA now enables DMA copies for the iSCSI data path.
CONFIG_TCP_DMA and CONFIG_ISCSI_DMA are mutually exclusive untill
patches
are submitted to enable CB multi-client support.
Signed-off-by: Supreeth Venkataraman <[EMAIL PROTECTED]>
Signed-off-by: Supreeth Venkataraman <[EMAIL PROTECTED]>
---
drivers/dma/Kconfig | 13 +-
include/linux/tcp.h | 4 -
include/net/netdma.h | 6 +
include/net/sock.h | 2
include/net/tcp.h | 4 -
net/core/Makefile | 2
net/core/netdma.c | 243 ++++++++++++++++++++++++++++++++++++
++++++++
net/core/sock.c | 4 -
net/ipv4/sysctl_net_ipv4.c | 4 -
net/ipv4/tcp.c | 19 ++-
net/ipv4/tcp_input.c | 10 +-
net/ipv4/tcp_ipv4.c | 4 -
net/ipv6/tcp_ipv6.c | 2
13 files changed, 283 insertions(+), 34 deletions(-)
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index c46b7c2..8f8b798 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -40,15 +40,14 @@ config DMA_ENGINE
bool
comment "DMA Clients"
- depends on DMA_ENGINE
+ depends on DMA_ENGINE
config NET_DMA
- bool "Network: TCP receive copy offload"
- depends on DMA_ENGINE && NET
+ bool "Network DMA offload support"
+ depends on DMA_ENGINE && NET
+ default y
help
- This enables the use of DMA engines in the network stack to
- offload receive copy-to-user operations, freeing CPU cycles.
- Since this is the main user of the DMA engine, it should be
enabled;
- say Y here.
+ This enables the netdma library used to manipulate networking
+ internals using DMA copies.
endif
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bac17c5..85edf61 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -273,13 +273,13 @@ struct tcp_sock {
struct iovec *iov;
int memory;
int len;
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
/* members for async copy */
struct dma_chan *dma_chan;
int wakeup;
struct dma_pinned_list *pinned_list;
dma_cookie_t dma_cookie;
-#endif
+#endif /* CONFIG_TCP_DMA */
} ucopy;
u32 snd_wl1; /* Sequence for window update */
diff --git a/include/net/netdma.h b/include/net/netdma.h
index f28c6e0..19a18f6 100644
--- a/include/net/netdma.h
+++ b/include/net/netdma.h
@@ -39,5 +39,11 @@ int dma_skb_copy_datagram_iovec(struct dma_chan*
chan,
struct sk_buff *skb, int offset, struct iovec *to,
size_t len, struct dma_pinned_list *pinned_list);
+int dma_skb_copy_bits(struct dma_chan *chan, struct sk_buff *skb,
+ int offset, void *to, int len, dma_cookie_t *dma_cookie);
+
+int dma_skb_copy_bits(struct dma_chan *chan, struct sk_buff *skb,
+ int offset, void *to, int len, dma_cookie_t *dma_cookie);
+
#endif /* CONFIG_NET_DMA */
#endif /* NETDMA_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 6e1542d..fd8edc4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1351,7 +1351,7 @@ static inline void sk_eat_skb(struct sock *sk,
struct sk_buff *skb, int copied_e
__skb_unlink(skb, &sk->sk_receive_queue);
__kfree_skb(skb);
}
-#endif
+#endif /* CONFIG_NET_DMA */
extern void sock_enable_timestamp(struct sock *sk);
extern int sock_get_timestamp(struct sock *, struct timeval __user
*);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index cb5b033..396f85d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -865,12 +865,12 @@ static inline void tcp_prequeue_init(struct
tcp_sock *tp)
tp->ucopy.len = 0;
tp->ucopy.memory = 0;
skb_queue_head_init(&tp->ucopy.prequeue);
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
tp->ucopy.dma_chan = NULL;
tp->ucopy.wakeup = 0;
tp->ucopy.pinned_list = NULL;
tp->ucopy.dma_cookie = 0;
-#endif
+#endif /* CONFIG_TCP_DMA */
}
/* Packet is added to VJ-style prequeue for processing in process
diff --git a/net/core/Makefile b/net/core/Makefile
index b1332f6..7626cd8 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -14,5 +14,5 @@ obj-$(CONFIG_XFRM) += flow.o
obj-y += net-sysfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
-obj-$(CONFIG_NET_DMA) += user_dma.o
+obj-$(CONFIG_NET_DMA) += netdma.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff --git a/net/core/netdma.c b/net/core/netdma.c
new file mode 100644
index 0000000..4acad7a
--- /dev/null
+++ b/net/core/netdma.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Portions based on net/core/datagram.c and copyrighted by their
authors.
+ *
+ * This program is free software; you can redistribute it and/or
modify it
+ * under the terms of the GNU General Public License as published by
the Free
+ * Software Foundation; either version 2 of the License, or (at your
option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this
distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+#ifdef CONFIG_NET_DMA
+
+#include <linux/dmaengine.h>
+#include <linux/socket.h>
+#include <linux/rtnetlink.h> /* for BUG_TRAP */
+#include <net/tcp.h>
+#include <net/netdma.h>
+#include "kmap_skb.h"
+
+#define NET_DMA_DEFAULT_COPYBREAK 4096
+
+int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
+
+/**
+ * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ * @skb - buffer to copy
+ * @offset - offset in the buffer to start copying from
+ * @iovec - io vector to copy to
+ * @len - amount of data to copy from buffer to iovec
+ * @pinned_list - locked iovec buffer data
+ *
+ * Note: the iovec is modified during the copy.
+ */
+int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
+ struct sk_buff *skb, int offset, struct iovec *to,
+ size_t len, struct dma_pinned_list *pinned_list)
+{
+ int start = skb_headlen(skb);
+ int i, copy = start - offset;
+ dma_cookie_t cookie = 0;
+
+ /* Copy header. */
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ cookie = dma_memcpy_to_iovec(chan, to, pinned_list,
+ skb->data + offset, copy);
+ if (cookie < 0)
+ goto fault;
+ len -= copy;
+ if (len == 0)
+ goto end;
+ offset += copy;
+ }
+
+ /* Copy paged appendix. Hmm... why does this look so complicated? */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ copy = end - offset;
+ if ((copy = end - offset) > 0) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct page *page = frag->page;
+
+ if (copy > len)
+ copy = len;
+
+ cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list,
page,
+ frag->page_offset + offset - start,
copy);
+ if (cookie < 0)
+ goto fault;
+ len -= copy;
+ if (len == 0)
+ goto end;
+ offset += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ copy = end - offset;
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ cookie = dma_skb_copy_datagram_iovec(chan, list,
+ offset - start, to, copy,
+ pinned_list);
+ if (cookie < 0)
+ goto fault;
+ len -= copy;
+ if (len == 0)
+ goto end;
+ offset += copy;
+ }
+ start = end;
+ }
+ }
+
+end:
+ if (!len) {
+ skb->dma_cookie = cookie;
+ return cookie;
+ }
+
+fault:
+ return -EFAULT;
+}
+
+/**
+ * dma_skb_copy_bits - Copy a skb to a buffer
+ * @skb - buffer to copy
+ * @offset - offset in the buffer to start copying from
+ * @iovec - io vector to copy to
+ * @len - amount of data to copy from buffer to iovec
+ * @pinned_list - locked iovec buffer data
+ *
+ * Note: the iovec is modified during the copy.
+ */
+int dma_skb_copy_bits(struct dma_chan *chan, struct sk_buff *skb,
+ int offset, void *to, int len, dma_cookie_t *dma_cookie)
+{
+ int i, copy;
+ int start = skb_headlen(skb);
+
+ *dma_cookie = 0;
+
+ if (offset > (int)skb->len - len)
+ goto fault;
+
+ /* Copy header. */
+ if ((copy = start - offset) > 0) {
+ if (copy > len)
+ copy = len;
+
+ skb->dma_cookie = *dma_cookie =
+ dma_async_memcpy_buf_to_buf(chan, to, skb->data + offset,
copy);
+
+ if (skb->dma_cookie < 0)
+ goto fault;
+
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ if ((copy = end - offset) > 0) {
+ u8 *vaddr;
+
+ if (copy > len)
+ copy = len;
+
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)-
>frags[i]);
+
+ skb->dma_cookie = *dma_cookie =
+ dma_async_memcpy_buf_to_buf(chan, to, vaddr +
+
skb_shinfo(skb)->frags[i].page_offset +
+ offset - start, copy);
+
+ if (skb->dma_cookie < 0)
+ goto fault;
+
+ kunmap_skb_frag(vaddr);
+
+ if ((len -= copy) == 0)
+ {
+ return 0;
+ }
+ offset += copy;
+ to += copy;
+ }
+ start = end;
+ }
+
+ if (0 < skb->dma_cookie)
+ *dma_cookie = skb->dma_cookie;
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ if ((copy = end - offset) > 0) {
+ if (copy > len)
+ copy = len;
+ if (dma_skb_copy_bits(chan, list, offset -
start,
+ to, copy, dma_cookie))
+ goto fault;
+ if ((len -= copy) == 0)
+ return 0;
+ offset += copy;
+ to += copy;
+ }
+ start = end;
+ }
+ }
+ if (!len)
+ return 0;
+
+fault:
+ return -EFAULT;
+}
+
+EXPORT_SYMBOL(dma_skb_copy_bits);
+
+#endif /* CONFIG_NET_DMA */
diff --git a/net/core/sock.c b/net/core/sock.c
index c519b43..905aaa7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -993,7 +993,7 @@ struct sock *sk_clone(const struct sock *sk, const
gfp_t priority)
skb_queue_head_init(&newsk->sk_write_queue);
#ifdef CONFIG_NET_DMA
skb_queue_head_init(&newsk->sk_async_wait_queue);
-#endif
+#endif /* CONFIG_NET_DMA */
rwlock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
@@ -1564,7 +1564,7 @@ void sock_init_data(struct socket *sock, struct
sock *sk)
skb_queue_head_init(&sk->sk_error_queue);
#ifdef CONFIG_NET_DMA
skb_queue_head_init(&sk->sk_async_wait_queue);
-#endif
+#endif /* CONFIG_NET_DMA */
sk->sk_send_head = NULL;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 16e04b7..7add8e2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -811,7 +811,7 @@ ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
{
.ctl_name = NET_TCP_DMA_COPYBREAK,
.procname = "tcp_dma_copybreak",
@@ -820,7 +820,7 @@ ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
-#endif
+#endif /* CONFIG_TCP_DMA */
{
.ctl_name = NET_TCP_SLOW_START_AFTER_IDLE,
.procname = "tcp_slow_start_after_idle",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8e65182..8d67edb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1142,9 +1142,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock
*sk, struct msghdr *msg,
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
tp->ucopy.dma_chan = NULL;
preempt_disable();
+
skb = skb_peek_tail(&sk->sk_receive_queue);
{
int available = 0;
@@ -1162,7 +1163,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock
*sk, struct msghdr *msg,
preempt_enable_no_resched();
}
}
-#endif
+#endif /* CONFIG_TCP_DMA */
do {
u32 offset;
@@ -1304,9 +1305,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock
*sk, struct msghdr *msg,
} else
sk_wait_data(sk, &timeo);
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
tp->ucopy.wakeup = 0;
-#endif
+#endif /* CONFIG_TCP_DMA */
if (user_recv) {
int chunk;
@@ -1363,7 +1364,7 @@ do_prequeue:
}
if (!(flags & MSG_TRUNC)) {
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
tp->ucopy.dma_chan = get_softnet_dma();
@@ -1386,7 +1387,7 @@ do_prequeue:
copied_early = 1;
} else
-#endif
+#endif /* CONFIG_TCP_DMA */
{
err = skb_copy_datagram_iovec(skb, offset,
msg->msg_iov, used);
@@ -1450,7 +1451,7 @@ skip_copy:
tp->ucopy.len = 0;
}
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
if (tp->ucopy.dma_chan) {
dma_cookie_t done, used;
@@ -1477,7 +1478,7 @@ skip_copy:
dma_unpin_iovec_pages(tp->ucopy.pinned_list);
tp->ucopy.pinned_list = NULL;
}
-#endif
+#endif /* CONFIG_TCP_DMA */
/* According to UNIX98, msg_name/msg_namelen are ignored
* on connected socket. I was just happy when found this 8) --ANK
@@ -1751,7 +1752,7 @@ int tcp_disconnect(struct sock *sk, int flags)
__skb_queue_purge(&tp->out_of_order_queue);
#ifdef CONFIG_NET_DMA
__skb_queue_purge(&sk->sk_async_wait_queue);
-#endif
+#endif /* CONFIG_NET_DMA */
inet->dport = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b39f0d8..91da168 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4376,7 +4376,7 @@ static inline int
tcp_checksum_complete_user(struct sock *sk, struct sk_buff *sk
__tcp_checksum_complete_user(sk, skb);
}
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff
*skb, int hlen)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4418,7 +4418,7 @@ static int tcp_dma_try_early_copy(struct sock
*sk, struct sk_buff *skb, int hlen
out:
return copied_early;
}
-#endif /* CONFIG_NET_DMA */
+#endif /* CONFIG_TCP_DMA */
/*
* TCP receive function for the ESTABLISHED state.
@@ -4538,12 +4538,12 @@ int tcp_rcv_established(struct sock *sk,
struct sk_buff *skb,
if (tp->copied_seq == tp->rcv_nxt &&
len - tcp_header_len <= tp->ucopy.len) {
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
if (tcp_dma_try_early_copy(sk, skb,
tcp_header_len)) {
copied_early = 1;
eaten = 1;
}
-#endif
+#endif /* CONFIG_TCP_DMA */
if (tp->ucopy.task == current &&
sock_owned_by_user(sk) && !
copied_early) {
__set_current_state(TASK_RUNNING);
@@ -4613,7 +4613,7 @@ no_ack:
if (copied_early)
__skb_queue_tail(&sk->sk_async_wait_queue, skb);
else
-#endif
+#endif /* CONFIG_NET_DMA */
if (eaten)
__kfree_skb(skb);
else
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 652c323..44d3755 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1682,14 +1682,14 @@ process:
bh_lock_sock_nested(sk);
ret = 0;
if (!sock_owned_by_user(sk)) {
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
tp->ucopy.dma_chan = get_softnet_dma();
if (tp->ucopy.dma_chan)
ret = tcp_v4_do_rcv(sk, skb);
else
-#endif
+#endif /* CONFIG_TCP_DMA */
{
if (!tcp_prequeue(sk, skb))
ret = tcp_v4_do_rcv(sk, skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 93980c3..ec331b5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1732,7 +1732,7 @@ process:
bh_lock_sock_nested(sk);
ret = 0;
if (!sock_owned_by_user(sk)) {
-#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_TCP_DMA
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
tp->ucopy.dma_chan = get_softnet_dma();
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"open-iscsi" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at http://groups.google.com/group/open-iscsi
-~----------~----~----~----~------~----~------~--~---