Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT
Chris Leech [EMAIL PROTECTED] wrote: Locks down user pages and sets up for DMA in tcp_recvmsg, then calls dma_async_try_early_copy in tcp_v4_do_rcv All those ifdefs are still there. They really do put a maintenance burden on, of all places, net/ipv4/tcp.c. Please find a way of abstracting out the conceptual additions which IOAT makes to TCP and to wrap that into inline functions, etc. See how tidily NF_HOOK() has inserted things into the net stack. Also, for something which is billed as an performance enhancement, benchmark numbers are the key thing which we need to judge the desirability of these changes. What is the plan there? Thanks. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/8] [I/OAT] TCP recv offload to I/OAT
Locks down user pages and sets up for DMA in tcp_recvmsg, then calls dma_async_try_early_copy in tcp_v4_do_rcv Signed-off-by: Chris Leech [EMAIL PROTECTED] --- include/net/netdma.h |1 net/ipv4/tcp.c | 110 +- net/ipv4/tcp_input.c | 74 ++ net/ipv4/tcp_ipv4.c | 18 net/ipv6/tcp_ipv6.c | 12 + 5 files changed, 193 insertions(+), 22 deletions(-) diff --git a/include/net/netdma.h b/include/net/netdma.h index feb499f..3d9c222 100644 --- a/include/net/netdma.h +++ b/include/net/netdma.h @@ -38,6 +38,7 @@ static inline struct dma_chan *get_softn int dma_skb_copy_datagram_iovec(struct dma_chan* chan, const struct sk_buff *skb, int offset, struct iovec *to, size_t len, struct dma_pinned_list *pinned_list); +int dma_async_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen); #endif /* CONFIG_NET_DMA */ #endif /* NETDMA_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9122520..a277398 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -262,7 +262,7 @@ #include net/tcp.h #include net/xfrm.h #include net/ip.h - +#include net/netdma.h #include asm/uaccess.h #include asm/ioctls.h @@ -1109,6 +1109,7 @@ int tcp_recvmsg(struct kiocb *iocb, stru int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; + int copied_early = 0; lock_sock(sk); @@ -1132,6 +1133,12 @@ int tcp_recvmsg(struct kiocb *iocb, stru target = sock_rcvlowat(sk, flags MSG_WAITALL, len); +#ifdef CONFIG_NET_DMA + tp-ucopy.dma_chan = NULL; + if ((len sysctl_tcp_dma_copybreak) !(flags MSG_PEEK) !sysctl_tcp_low_latency __get_cpu_var(softnet_data.net_dma)) + tp-ucopy.pinned_list = dma_pin_iovec_pages(msg-msg_iov, len); +#endif + do { struct sk_buff *skb; u32 offset; @@ -1273,6 +1280,10 @@ int tcp_recvmsg(struct kiocb *iocb, stru } else sk_wait_data(sk, timeo); +#ifdef CONFIG_NET_DMA + tp-ucopy.wakeup = 0; +#endif + if (user_recv) { int chunk; @@ -1328,13 +1339,39 @@ do_prequeue: } if (!(flags MSG_TRUNC)) { - err = skb_copy_datagram_iovec(skb, offset, - msg-msg_iov, used); - if (err) { - /* Exception. Bailout! */ - if (!copied) - copied = -EFAULT; - break; +#ifdef CONFIG_NET_DMA + if (!tp-ucopy.dma_chan tp-ucopy.pinned_list) + tp-ucopy.dma_chan = get_softnet_dma(); + + if (tp-ucopy.dma_chan) { + tp-ucopy.dma_cookie = dma_skb_copy_datagram_iovec( + tp-ucopy.dma_chan, skb, offset, + msg-msg_iov, used, + tp-ucopy.pinned_list); + + if (tp-ucopy.dma_cookie 0) { + + printk(KERN_ALERT dma_cookie 0\n); + + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; + } + if ((offset + used) == skb-len) + copied_early = 1; + + } else +#endif + { + err = skb_copy_datagram_iovec(skb, offset, + msg-msg_iov, used); + if (err) { + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; + } } } @@ -1354,15 +1391,33 @@ skip_copy: if (skb-h.th-fin) goto found_fin_ok; - if (!(flags MSG_PEEK)) - sk_eat_skb(sk, skb); + if (!(flags MSG_PEEK)) { + if (!copied_early) + sk_eat_skb(sk, skb); +#ifdef CONFIG_NET_DMA + else { + __skb_unlink(skb, sk-sk_receive_queue); + __skb_queue_tail(sk-sk_async_wait_queue, skb); + copied_early = 0; + } +#endif + }
Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT
Chris Leech [EMAIL PROTECTED] wrote: +#ifdef CONFIG_NET_DMA +tp-ucopy.dma_chan = NULL; +if ((len sysctl_tcp_dma_copybreak) !(flags MSG_PEEK) !sysctl_tcp_low_latency __get_cpu_var(softnet_data.net_dma)) +dma_lock_iovec_pages(msg-msg_iov, len, tp-ucopy.locked_list); +#endif The __get_cpu_var() here will run smp_processor_id() from preemptible context. You'll get a big warning if the correct debug options are set. The reason for this is that preemption could cause this code to hop between CPUs. Please always test code with all debug options enabled and with full kernel preemption. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT
From: Andrew Morton [EMAIL PROTECTED] Date: Sun, 5 Mar 2006 00:45:34 -0800 The __get_cpu_var() here will run smp_processor_id() from preemptible context. You'll get a big warning if the correct debug options are set. The reason for this is that preemption could cause this code to hop between CPUs. Please always test code with all debug options enabled and with full kernel preemption. To be fair that warning doesn't trigger on some platforms, such as sparc64 where the __get_cpu_var() implementation simply takes the value from a fixed cpu register and doesn't do the debugging check. Sparc64 should add the check when debugging options are enabled, for sure, but the point is that it may not entirely be the tester's fault. :-) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT
Hi! --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -262,6 +262,9 @@ #include net/tcp.h #include net/xfrm.h #include net/ip.h +#ifdef CONFIG_NET_DMA +#include net/netdma.h +#endif Remove the ifdefs, move them inside .h if needed. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7625eaf..9b6290d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -71,6 +71,9 @@ #include net/inet_common.h #include linux/ipsec.h #include asm/unaligned.h +#ifdef CONFIG_NET_DMA +#include net/netdma.h +#endif Here, too. +#ifdef CONFIG_NET_DMA + if (copied_early) + __skb_queue_tail(sk-sk_async_wait_queue, skb); + else +#endif if (eaten) __kfree_skb(skb); else Could you #define copied_early to 0 and avoid ifdefs? @@ -1091,8 +1094,18 @@ process: bh_lock_sock(sk); ret = 0; if (!sock_owned_by_user(sk)) { - if (!tcp_prequeue(sk, skb)) +#ifdef CONFIG_NET_DMA + struct tcp_sock *tp = tcp_sk(sk); + if (!tp-ucopy.dma_chan tp-ucopy.locked_list) + tp-ucopy.dma_chan = get_softnet_dma(); + if (tp-ucopy.dma_chan) + ret = tcp_v4_do_rcv(sk, skb); + else +#endif + { + if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); + } } else Wrong indentation... Pavel -- Thanks, Sharp! - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT
On Fri, Mar 03, 2006 at 01:42:36PM -0800, Chris Leech wrote: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 13abfa2..b792048 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -262,6 +262,9 @@ #include net/tcp.h #include net/xfrm.h #include net/ip.h +#ifdef CONFIG_NET_DMA +#include net/netdma.h +#endif #ifdef is not needed here (try not to put #ifdef in .c files.) I think a few of your other usages of #ifdef in this file can also be removed with judicious use of inline functions in a .h file. thanks, greg k-h - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT
Chris Leech [EMAIL PROTECTED] wrote: Locks down user pages and sets up for DMA in tcp_recvmsg, then calls dma_async_try_early_copy in tcp_v4_do_rcv +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA +#ifdef CONFIG_NET_DMA waaay too many ifdefs. There are various tricks we use to minimise them. +#ifdef CONFIG_NET_DMA + tp-ucopy.dma_chan = NULL; + if ((len sysctl_tcp_dma_copybreak) !(flags MSG_PEEK) !sysctl_tcp_low_latency __get_cpu_var(softnet_data.net_dma)) + dma_lock_iovec_pages(msg-msg_iov, len, tp-ucopy.locked_list); +#endif Please try to fit code into 80 columns. That's decimal 80 ;) @@ -1328,13 +1342,39 @@ do_prequeue: } if (!(flags MSG_TRUNC)) { - err = skb_copy_datagram_iovec(skb, offset, - msg-msg_iov, used); - if (err) { - /* Exception. Bailout! */ - if (!copied) - copied = -EFAULT; - break; +#ifdef CONFIG_NET_DMA + if (!tp-ucopy.dma_chan tp-ucopy.locked_list) + tp-ucopy.dma_chan = get_softnet_dma(); + + if (tp-ucopy.dma_chan) { + tp-ucopy.dma_cookie = dma_skb_copy_datagram_iovec( + tp-ucopy.dma_chan, skb, offset, + msg-msg_iov, used, + tp-ucopy.locked_list); + + if (tp-ucopy.dma_cookie 0) { + + printk(KERN_ALERT dma_cookie 0\n); + + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; + } + if ((offset + used) == skb-len) + copied_early = 1; + Consider trimming some of those blank lines. I don't think they add any value? + } else +#endif + { These games with ifdefs and else statements aren't at all pleasant. Sometimes they're hard to avoid, but you'll probably find that some code rearrangemnt (in a preceding patch) makes it easier. Like, split this function into several. @@ -1354,15 +1394,33 @@ skip_copy: if (skb-h.th-fin) goto found_fin_ok; - if (!(flags MSG_PEEK)) - sk_eat_skb(sk, skb); + if (!(flags MSG_PEEK)) { + if (!copied_early) + sk_eat_skb(sk, skb); +#ifdef CONFIG_NET_DMA + else { + __skb_unlink(skb, sk-sk_receive_queue); + __skb_queue_tail(sk-sk_async_wait_queue, skb); + copied_early = 0; + } +#endif ... - sk_eat_skb(sk, skb); + if (!(flags MSG_PEEK)) { + if (!copied_early) + sk_eat_skb(sk, skb); +#ifdef CONFIG_NET_DMA + else { + __skb_unlink(skb, sk-sk_receive_queue); + __skb_queue_tail(sk-sk_async_wait_queue, skb); + copied_early = 0; + } +#endif + } etc. +#ifdef CONFIG_NET_DMA + if (copied_early) + __skb_queue_tail(sk-sk_async_wait_queue, skb); + else +#endif if (eaten) __kfree_skb(skb); else etc. @@ -4049,6 +4067,52 @@ discard: return 0; } +#ifdef CONFIG_NET_DMA +int dma_async_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen) +{ + struct tcp_sock *tp = tcp_sk(sk); + int chunk = skb-len - hlen; + int dma_cookie; + int copied_early = 0; + + if (tp-ucopy.wakeup) + goto out; In this case a simple return 0; would be fine. We haven't done anything yet. +#ifdef CONFIG_NET_DMA + struct tcp_sock *tp = tcp_sk(sk); + if (!tp-ucopy.dma_chan tp-ucopy.locked_list) + tp-ucopy.dma_chan = get_softnet_dma(); + if (tp-ucopy.dma_chan) + ret = tcp_v4_do_rcv(sk, skb); + else +#endif + { +