This patch changes the architecture of the CONFIG_ variables set in the .config file. Previously CONFIG_NET_DMA enabled I/OAT for TCP. With this patch CONFIG_NET_DMA enables the net/core/netdma library which has two functions. The function dma_skb_copy_datagram_iovec is used by TCP and dma_skb_copy_bits, which is added by the following patch, is used by iSCSI. However, by setting CONFIG_NET_DMA these functions are available to anyone who needs them. The idea behind this change was to have some core library for networking related DMA operations.
CONFIG_TCP_DMA now enables DMA copies for the TCP subsystem. CONFIG_ISCSI_DMA now enables DMA copies for the iSCSI data path. CONFIG_TCP_DMA and CONFIG_ISCSI_DMA are mutually exclusive untill patches are submitted to enable CB multi-client support. Signed-off-by: Supreeth Venkataraman <[EMAIL PROTECTED]> Signed-off-by: Supreeth Venkataraman <[EMAIL PROTECTED]> --- drivers/dma/Kconfig | 13 +- include/linux/tcp.h | 4 - include/net/netdma.h | 6 + include/net/sock.h | 2 include/net/tcp.h | 4 - net/core/Makefile | 2 net/core/netdma.c | 243 ++++++++++++++++++++++++++++++++++++ ++++++++ net/core/sock.c | 4 - net/ipv4/sysctl_net_ipv4.c | 4 - net/ipv4/tcp.c | 19 ++- net/ipv4/tcp_input.c | 10 +- net/ipv4/tcp_ipv4.c | 4 - net/ipv6/tcp_ipv6.c | 2 13 files changed, 283 insertions(+), 34 deletions(-) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index c46b7c2..8f8b798 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -40,15 +40,14 @@ config DMA_ENGINE bool comment "DMA Clients" - depends on DMA_ENGINE + depends on DMA_ENGINE config NET_DMA - bool "Network: TCP receive copy offload" - depends on DMA_ENGINE && NET + bool "Network DMA offload support" + depends on DMA_ENGINE && NET + default y help - This enables the use of DMA engines in the network stack to - offload receive copy-to-user operations, freeing CPU cycles. - Since this is the main user of the DMA engine, it should be enabled; - say Y here. + This enables the netdma library used to manipulate networking + internals using DMA copies. endif diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bac17c5..85edf61 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -273,13 +273,13 @@ struct tcp_sock { struct iovec *iov; int memory; int len; -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA /* members for async copy */ struct dma_chan *dma_chan; int wakeup; struct dma_pinned_list *pinned_list; dma_cookie_t dma_cookie; -#endif +#endif /* CONFIG_TCP_DMA */ } ucopy; u32 snd_wl1; /* Sequence for window update */ diff --git a/include/net/netdma.h b/include/net/netdma.h index f28c6e0..19a18f6 100644 --- a/include/net/netdma.h +++ b/include/net/netdma.h @@ -39,5 +39,11 @@ int dma_skb_copy_datagram_iovec(struct dma_chan* chan, struct sk_buff *skb, int offset, struct iovec *to, size_t len, struct dma_pinned_list *pinned_list); +int dma_skb_copy_bits(struct dma_chan *chan, struct sk_buff *skb, + int offset, void *to, int len, dma_cookie_t *dma_cookie); + +int dma_skb_copy_bits(struct dma_chan *chan, struct sk_buff *skb, + int offset, void *to, int len, dma_cookie_t *dma_cookie); + #endif /* CONFIG_NET_DMA */ #endif /* NETDMA_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 6e1542d..fd8edc4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1351,7 +1351,7 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); } -#endif +#endif /* CONFIG_NET_DMA */ extern void sock_enable_timestamp(struct sock *sk); extern int sock_get_timestamp(struct sock *, struct timeval __user *); diff --git a/include/net/tcp.h b/include/net/tcp.h index cb5b033..396f85d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -865,12 +865,12 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) tp->ucopy.len = 0; tp->ucopy.memory = 0; skb_queue_head_init(&tp->ucopy.prequeue); -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA tp->ucopy.dma_chan = NULL; tp->ucopy.wakeup = 0; tp->ucopy.pinned_list = NULL; tp->ucopy.dma_cookie = 0; -#endif +#endif /* CONFIG_TCP_DMA */ } /* Packet is added to VJ-style prequeue for processing in process diff --git a/net/core/Makefile b/net/core/Makefile index b1332f6..7626cd8 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -14,5 +14,5 @@ obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o -obj-$(CONFIG_NET_DMA) += user_dma.o +obj-$(CONFIG_NET_DMA) += netdma.o obj-$(CONFIG_FIB_RULES) += fib_rules.o diff --git a/net/core/netdma.c b/net/core/netdma.c new file mode 100644 index 0000000..4acad7a --- /dev/null +++ b/net/core/netdma.c @@ -0,0 +1,243 @@ +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * Portions based on net/core/datagram.c and copyrighted by their authors. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This code allows the net stack to make use of a DMA engine for + * skb to iovec copies. + */ +#ifdef CONFIG_NET_DMA + +#include <linux/dmaengine.h> +#include <linux/socket.h> +#include <linux/rtnetlink.h> /* for BUG_TRAP */ +#include <net/tcp.h> +#include <net/netdma.h> +#include "kmap_skb.h" + +#define NET_DMA_DEFAULT_COPYBREAK 4096 + +int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; + +/** + * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. + * @skb - buffer to copy + * @offset - offset in the buffer to start copying from + * @iovec - io vector to copy to + * @len - amount of data to copy from buffer to iovec + * @pinned_list - locked iovec buffer data + * + * Note: the iovec is modified during the copy. + */ +int dma_skb_copy_datagram_iovec(struct dma_chan *chan, + struct sk_buff *skb, int offset, struct iovec *to, + size_t len, struct dma_pinned_list *pinned_list) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + dma_cookie_t cookie = 0; + + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + cookie = dma_memcpy_to_iovec(chan, to, pinned_list, + skb->data + offset, copy); + if (cookie < 0) + goto fault; + len -= copy; + if (len == 0) + goto end; + offset += copy; + } + + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + copy = end - offset; + if ((copy = end - offset) > 0) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + + if (copy > len) + copy = len; + + cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page, + frag->page_offset + offset - start, copy); + if (cookie < 0) + goto fault; + len -= copy; + if (len == 0) + goto end; + offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + copy = end - offset; + if (copy > 0) { + if (copy > len) + copy = len; + cookie = dma_skb_copy_datagram_iovec(chan, list, + offset - start, to, copy, + pinned_list); + if (cookie < 0) + goto fault; + len -= copy; + if (len == 0) + goto end; + offset += copy; + } + start = end; + } + } + +end: + if (!len) { + skb->dma_cookie = cookie; + return cookie; + } + +fault: + return -EFAULT; +} + +/** + * dma_skb_copy_bits - Copy a skb to a buffer + * @skb - buffer to copy + * @offset - offset in the buffer to start copying from + * @iovec - io vector to copy to + * @len - amount of data to copy from buffer to iovec + * @pinned_list - locked iovec buffer data + * + * Note: the iovec is modified during the copy. + */ +int dma_skb_copy_bits(struct dma_chan *chan, struct sk_buff *skb, + int offset, void *to, int len, dma_cookie_t *dma_cookie) +{ + int i, copy; + int start = skb_headlen(skb); + + *dma_cookie = 0; + + if (offset > (int)skb->len - len) + goto fault; + + /* Copy header. */ + if ((copy = start - offset) > 0) { + if (copy > len) + copy = len; + + skb->dma_cookie = *dma_cookie = + dma_async_memcpy_buf_to_buf(chan, to, skb->data + offset, copy); + + if (skb->dma_cookie < 0) + goto fault; + + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end - offset) > 0) { + u8 *vaddr; + + if (copy > len) + copy = len; + + vaddr = kmap_skb_frag(&skb_shinfo(skb)- >frags[i]); + + skb->dma_cookie = *dma_cookie = + dma_async_memcpy_buf_to_buf(chan, to, vaddr + + skb_shinfo(skb)->frags[i].page_offset + + offset - start, copy); + + if (skb->dma_cookie < 0) + goto fault; + + kunmap_skb_frag(vaddr); + + if ((len -= copy) == 0) + { + return 0; + } + offset += copy; + to += copy; + } + start = end; + } + + if (0 < skb->dma_cookie) + *dma_cookie = skb->dma_cookie; + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + if ((copy = end - offset) > 0) { + if (copy > len) + copy = len; + if (dma_skb_copy_bits(chan, list, offset - start, + to, copy, dma_cookie)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + start = end; + } + } + if (!len) + return 0; + +fault: + return -EFAULT; +} + +EXPORT_SYMBOL(dma_skb_copy_bits); + +#endif /* CONFIG_NET_DMA */ diff --git a/net/core/sock.c b/net/core/sock.c index c519b43..905aaa7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -993,7 +993,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) skb_queue_head_init(&newsk->sk_write_queue); #ifdef CONFIG_NET_DMA skb_queue_head_init(&newsk->sk_async_wait_queue); -#endif +#endif /* CONFIG_NET_DMA */ rwlock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); @@ -1564,7 +1564,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) skb_queue_head_init(&sk->sk_error_queue); #ifdef CONFIG_NET_DMA skb_queue_head_init(&sk->sk_async_wait_queue); -#endif +#endif /* CONFIG_NET_DMA */ sk->sk_send_head = NULL; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 16e04b7..7add8e2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -811,7 +811,7 @@ ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA { .ctl_name = NET_TCP_DMA_COPYBREAK, .procname = "tcp_dma_copybreak", @@ -820,7 +820,7 @@ ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, -#endif +#endif /* CONFIG_TCP_DMA */ { .ctl_name = NET_TCP_SLOW_START_AFTER_IDLE, .procname = "tcp_slow_start_after_idle", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8e65182..8d67edb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1142,9 +1142,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA tp->ucopy.dma_chan = NULL; preempt_disable(); + skb = skb_peek_tail(&sk->sk_receive_queue); { int available = 0; @@ -1162,7 +1163,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, preempt_enable_no_resched(); } } -#endif +#endif /* CONFIG_TCP_DMA */ do { u32 offset; @@ -1304,9 +1305,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else sk_wait_data(sk, &timeo); -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA tp->ucopy.wakeup = 0; -#endif +#endif /* CONFIG_TCP_DMA */ if (user_recv) { int chunk; @@ -1363,7 +1364,7 @@ do_prequeue: } if (!(flags & MSG_TRUNC)) { -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) tp->ucopy.dma_chan = get_softnet_dma(); @@ -1386,7 +1387,7 @@ do_prequeue: copied_early = 1; } else -#endif +#endif /* CONFIG_TCP_DMA */ { err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, used); @@ -1450,7 +1451,7 @@ skip_copy: tp->ucopy.len = 0; } -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA if (tp->ucopy.dma_chan) { dma_cookie_t done, used; @@ -1477,7 +1478,7 @@ skip_copy: dma_unpin_iovec_pages(tp->ucopy.pinned_list); tp->ucopy.pinned_list = NULL; } -#endif +#endif /* CONFIG_TCP_DMA */ /* According to UNIX98, msg_name/msg_namelen are ignored * on connected socket. I was just happy when found this 8) --ANK @@ -1751,7 +1752,7 @@ int tcp_disconnect(struct sock *sk, int flags) __skb_queue_purge(&tp->out_of_order_queue); #ifdef CONFIG_NET_DMA __skb_queue_purge(&sk->sk_async_wait_queue); -#endif +#endif /* CONFIG_NET_DMA */ inet->dport = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b39f0d8..91da168 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4376,7 +4376,7 @@ static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *sk __tcp_checksum_complete_user(sk, skb); } -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen) { struct tcp_sock *tp = tcp_sk(sk); @@ -4418,7 +4418,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen out: return copied_early; } -#endif /* CONFIG_NET_DMA */ +#endif /* CONFIG_TCP_DMA */ /* * TCP receive function for the ESTABLISHED state. @@ -4538,12 +4538,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len) { -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { copied_early = 1; eaten = 1; } -#endif +#endif /* CONFIG_TCP_DMA */ if (tp->ucopy.task == current && sock_owned_by_user(sk) && ! copied_early) { __set_current_state(TASK_RUNNING); @@ -4613,7 +4613,7 @@ no_ack: if (copied_early) __skb_queue_tail(&sk->sk_async_wait_queue, skb); else -#endif +#endif /* CONFIG_NET_DMA */ if (eaten) __kfree_skb(skb); else diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 652c323..44d3755 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1682,14 +1682,14 @@ process: bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA struct tcp_sock *tp = tcp_sk(sk); if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) tp->ucopy.dma_chan = get_softnet_dma(); if (tp->ucopy.dma_chan) ret = tcp_v4_do_rcv(sk, skb); else -#endif +#endif /* CONFIG_TCP_DMA */ { if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93980c3..ec331b5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1732,7 +1732,7 @@ process: bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { -#ifdef CONFIG_NET_DMA +#ifdef CONFIG_TCP_DMA struct tcp_sock *tp = tcp_sk(sk); if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) tp->ucopy.dma_chan = get_softnet_dma(); --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "open-iscsi" group. To post to this group, send email to open-iscsi@googlegroups.com To unsubscribe from this group, send email to [EMAIL PROTECTED] For more options, visit this group at http://groups.google.com/group/open-iscsi -~----------~----~----~----~------~----~------~--~---