Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT

2006-03-11 Thread Andrew Morton
Chris Leech [EMAIL PROTECTED] wrote:

 Locks down user pages and sets up for DMA in tcp_recvmsg, then calls
  dma_async_try_early_copy in tcp_v4_do_rcv

All those ifdefs are still there.  They really do put a maintenance burden
on, of all places, net/ipv4/tcp.c.  Please find a way of abstracting out
the conceptual additions which IOAT makes to TCP and to wrap that into
inline functions, etc.  See how tidily NF_HOOK() has inserted things into
the net stack.

Also, for something which is billed as an performance enhancement,
benchmark numbers are the key thing which we need to judge the desirability
of these changes.  What is the plan there?

Thanks.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/8] [I/OAT] TCP recv offload to I/OAT

2006-03-10 Thread Chris Leech
Locks down user pages and sets up for DMA in tcp_recvmsg, then calls
dma_async_try_early_copy in tcp_v4_do_rcv

Signed-off-by: Chris Leech [EMAIL PROTECTED]
---

 include/net/netdma.h |1 
 net/ipv4/tcp.c   |  110 +-
 net/ipv4/tcp_input.c |   74 ++
 net/ipv4/tcp_ipv4.c  |   18 
 net/ipv6/tcp_ipv6.c  |   12 +
 5 files changed, 193 insertions(+), 22 deletions(-)

diff --git a/include/net/netdma.h b/include/net/netdma.h
index feb499f..3d9c222 100644
--- a/include/net/netdma.h
+++ b/include/net/netdma.h
@@ -38,6 +38,7 @@ static inline struct dma_chan *get_softn
 int dma_skb_copy_datagram_iovec(struct dma_chan* chan,
const struct sk_buff *skb, int offset, struct iovec *to,
size_t len, struct dma_pinned_list *pinned_list);
+int dma_async_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen);
 
 #endif /* CONFIG_NET_DMA */
 #endif /* NETDMA_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9122520..a277398 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -262,7 +262,7 @@
 #include net/tcp.h
 #include net/xfrm.h
 #include net/ip.h
-
+#include net/netdma.h
 
 #include asm/uaccess.h
 #include asm/ioctls.h
@@ -1109,6 +1109,7 @@ int tcp_recvmsg(struct kiocb *iocb, stru
int target; /* Read at least this many bytes */
long timeo;
struct task_struct *user_recv = NULL;
+   int copied_early = 0;
 
lock_sock(sk);
 
@@ -1132,6 +1133,12 @@ int tcp_recvmsg(struct kiocb *iocb, stru
 
target = sock_rcvlowat(sk, flags  MSG_WAITALL, len);
 
+#ifdef CONFIG_NET_DMA
+   tp-ucopy.dma_chan = NULL;
+   if ((len  sysctl_tcp_dma_copybreak)  !(flags  MSG_PEEK)  
!sysctl_tcp_low_latency  __get_cpu_var(softnet_data.net_dma))
+   tp-ucopy.pinned_list = dma_pin_iovec_pages(msg-msg_iov, len);
+#endif
+
do {
struct sk_buff *skb;
u32 offset;
@@ -1273,6 +1280,10 @@ int tcp_recvmsg(struct kiocb *iocb, stru
} else
sk_wait_data(sk, timeo);
 
+#ifdef CONFIG_NET_DMA
+   tp-ucopy.wakeup = 0;
+#endif
+
if (user_recv) {
int chunk;
 
@@ -1328,13 +1339,39 @@ do_prequeue:
}
 
if (!(flags  MSG_TRUNC)) {
-   err = skb_copy_datagram_iovec(skb, offset,
- msg-msg_iov, used);
-   if (err) {
-   /* Exception. Bailout! */
-   if (!copied)
-   copied = -EFAULT;
-   break;
+#ifdef CONFIG_NET_DMA
+   if (!tp-ucopy.dma_chan  tp-ucopy.pinned_list)
+   tp-ucopy.dma_chan = get_softnet_dma();
+
+   if (tp-ucopy.dma_chan) {
+   tp-ucopy.dma_cookie = 
dma_skb_copy_datagram_iovec(
+   tp-ucopy.dma_chan, skb, offset,
+   msg-msg_iov, used,
+   tp-ucopy.pinned_list);
+
+   if (tp-ucopy.dma_cookie  0) {
+
+   printk(KERN_ALERT dma_cookie  0\n);
+
+   /* Exception. Bailout! */
+   if (!copied)
+   copied = -EFAULT;
+   break;
+   }
+   if ((offset + used) == skb-len)
+   copied_early = 1;
+
+   } else
+#endif
+   {
+   err = skb_copy_datagram_iovec(skb, offset,
+   msg-msg_iov, used);
+   if (err) {
+   /* Exception. Bailout! */
+   if (!copied)
+   copied = -EFAULT;
+   break;
+   }
}
}
 
@@ -1354,15 +1391,33 @@ skip_copy:
 
if (skb-h.th-fin)
goto found_fin_ok;
-   if (!(flags  MSG_PEEK))
-   sk_eat_skb(sk, skb);
+   if (!(flags  MSG_PEEK)) {
+   if (!copied_early)
+   sk_eat_skb(sk, skb);
+#ifdef CONFIG_NET_DMA
+   else {
+   __skb_unlink(skb, sk-sk_receive_queue);
+   __skb_queue_tail(sk-sk_async_wait_queue, skb);
+   copied_early = 0;
+   }
+#endif
+   }
   

Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT

2006-03-05 Thread Andrew Morton
Chris Leech [EMAIL PROTECTED] wrote:

 +#ifdef CONFIG_NET_DMA
  +tp-ucopy.dma_chan = NULL;
  +if ((len  sysctl_tcp_dma_copybreak)  !(flags  MSG_PEEK)  
 !sysctl_tcp_low_latency  __get_cpu_var(softnet_data.net_dma))
  +dma_lock_iovec_pages(msg-msg_iov, len, tp-ucopy.locked_list);
  +#endif

The __get_cpu_var() here will run smp_processor_id() from preemptible
context.  You'll get a big warning if the correct debug options are set.

The reason for this is that preemption could cause this code to hop between
CPUs.

Please always test code with all debug options enabled and with full kernel
preemption.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT

2006-03-05 Thread David S. Miller
From: Andrew Morton [EMAIL PROTECTED]
Date: Sun, 5 Mar 2006 00:45:34 -0800

 The __get_cpu_var() here will run smp_processor_id() from preemptible
 context.  You'll get a big warning if the correct debug options are set.
 
 The reason for this is that preemption could cause this code to hop between
 CPUs.
 
 Please always test code with all debug options enabled and with full kernel
 preemption.

To be fair that warning doesn't trigger on some platforms, such as
sparc64 where the __get_cpu_var() implementation simply takes the
value from a fixed cpu register and doesn't do the debugging check.

Sparc64 should add the check when debugging options are enabled, for
sure, but the point is that it may not entirely be the tester's fault.
:-)
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT

2006-03-05 Thread Pavel Machek
Hi!

 --- a/net/ipv4/tcp.c
 +++ b/net/ipv4/tcp.c
 @@ -262,6 +262,9 @@
  #include net/tcp.h
  #include net/xfrm.h
  #include net/ip.h
 +#ifdef CONFIG_NET_DMA
 +#include net/netdma.h
 +#endif
  

Remove the ifdefs, move them inside .h if needed.

 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
 index 7625eaf..9b6290d 100644
 --- a/net/ipv4/tcp_input.c
 +++ b/net/ipv4/tcp_input.c
 @@ -71,6 +71,9 @@
  #include net/inet_common.h
  #include linux/ipsec.h
  #include asm/unaligned.h
 +#ifdef CONFIG_NET_DMA
 +#include net/netdma.h
 +#endif

Here, too.

 +#ifdef CONFIG_NET_DMA
 + if (copied_early)
 + __skb_queue_tail(sk-sk_async_wait_queue, skb);
 + else
 +#endif
   if (eaten)
   __kfree_skb(skb);
   else

Could you #define copied_early to 0 and avoid ifdefs?

 @@ -1091,8 +1094,18 @@ process:
   bh_lock_sock(sk);
   ret = 0;
   if (!sock_owned_by_user(sk)) {
 - if (!tcp_prequeue(sk, skb))
 +#ifdef CONFIG_NET_DMA
 + struct tcp_sock *tp = tcp_sk(sk);
 + if (!tp-ucopy.dma_chan  tp-ucopy.locked_list)
 + tp-ucopy.dma_chan = get_softnet_dma();
 + if (tp-ucopy.dma_chan)
 + ret = tcp_v4_do_rcv(sk, skb);
 + else
 +#endif
 + {
 + if (!tcp_prequeue(sk, skb))
   ret = tcp_v4_do_rcv(sk, skb);
 + }
   } else

Wrong indentation...
Pavel
-- 
Thanks, Sharp!
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT

2006-03-04 Thread Greg KH
On Fri, Mar 03, 2006 at 01:42:36PM -0800, Chris Leech wrote:
 diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
 index 13abfa2..b792048 100644
 --- a/net/ipv4/tcp.c
 +++ b/net/ipv4/tcp.c
 @@ -262,6 +262,9 @@
  #include net/tcp.h
  #include net/xfrm.h
  #include net/ip.h
 +#ifdef CONFIG_NET_DMA
 +#include net/netdma.h
 +#endif

#ifdef is not needed here (try not to put #ifdef in .c files.)  I think
a few of your other usages of #ifdef in this file can also be removed
with judicious use of inline functions in a .h file.

thanks,

greg k-h
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT

2006-03-04 Thread Andrew Morton
Chris Leech [EMAIL PROTECTED] wrote:

 Locks down user pages and sets up for DMA in tcp_recvmsg, then calls
 dma_async_try_early_copy in tcp_v4_do_rcv
 

+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA
+#ifdef CONFIG_NET_DMA

waaay too many ifdefs.   There are various tricks we use to minimise them.

 +#ifdef CONFIG_NET_DMA
 + tp-ucopy.dma_chan = NULL;
 + if ((len  sysctl_tcp_dma_copybreak)  !(flags  MSG_PEEK)  
 !sysctl_tcp_low_latency  __get_cpu_var(softnet_data.net_dma))
 + dma_lock_iovec_pages(msg-msg_iov, len, tp-ucopy.locked_list);
 +#endif

Please try to fit code into 80 columns.

That's decimal 80 ;)

 @@ -1328,13 +1342,39 @@ do_prequeue:
   }
  
   if (!(flags  MSG_TRUNC)) {
 - err = skb_copy_datagram_iovec(skb, offset,
 -   msg-msg_iov, used);
 - if (err) {
 - /* Exception. Bailout! */
 - if (!copied)
 - copied = -EFAULT;
 - break;
 +#ifdef CONFIG_NET_DMA
 + if (!tp-ucopy.dma_chan  tp-ucopy.locked_list)
 + tp-ucopy.dma_chan = get_softnet_dma();
 +
 + if (tp-ucopy.dma_chan) {
 + tp-ucopy.dma_cookie = 
 dma_skb_copy_datagram_iovec(
 + tp-ucopy.dma_chan, skb, offset,
 + msg-msg_iov, used,
 + tp-ucopy.locked_list);
 +
 + if (tp-ucopy.dma_cookie  0) {
 +
 + printk(KERN_ALERT dma_cookie  0\n);
 +
 + /* Exception. Bailout! */
 + if (!copied)
 + copied = -EFAULT;
 + break;
 + }
 + if ((offset + used) == skb-len)
 + copied_early = 1;
 +

Consider trimming some of those blank lines.  I don't think they add any
value?

 + } else
 +#endif
 + {

These games with ifdefs and else statements aren't at all pleasant. 
Sometimes they're hard to avoid, but you'll probably find that some code
rearrangemnt (in a preceding patch) makes it easier.  Like, split this
function into several.

 @@ -1354,15 +1394,33 @@ skip_copy:
  
   if (skb-h.th-fin)
   goto found_fin_ok;
 - if (!(flags  MSG_PEEK))
 - sk_eat_skb(sk, skb);
 + if (!(flags  MSG_PEEK)) {
 + if (!copied_early)
 + sk_eat_skb(sk, skb);
 +#ifdef CONFIG_NET_DMA
 + else {
 + __skb_unlink(skb, sk-sk_receive_queue);
 + __skb_queue_tail(sk-sk_async_wait_queue, skb);
 + copied_early = 0;
 + }
 +#endif
 ...
 - sk_eat_skb(sk, skb);
 + if (!(flags  MSG_PEEK)) {
 + if (!copied_early)
 + sk_eat_skb(sk, skb);
 +#ifdef CONFIG_NET_DMA
 + else {
 + __skb_unlink(skb, sk-sk_receive_queue);
 + __skb_queue_tail(sk-sk_async_wait_queue, skb);
 + copied_early = 0;
 + }
 +#endif
 + }

etc.

 +#ifdef CONFIG_NET_DMA
 + if (copied_early)
 + __skb_queue_tail(sk-sk_async_wait_queue, skb);
 + else
 +#endif
   if (eaten)
   __kfree_skb(skb);
   else

etc.

 @@ -4049,6 +4067,52 @@ discard:
   return 0;
  }
  
 +#ifdef CONFIG_NET_DMA
 +int dma_async_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
 +{
 + struct tcp_sock *tp = tcp_sk(sk);
 + int chunk = skb-len - hlen;
 + int dma_cookie;
 + int copied_early = 0;
 +
 + if (tp-ucopy.wakeup)
 + goto out;

In this case a simple

return 0;

would be fine.  We haven't done anything yet.

 +#ifdef CONFIG_NET_DMA
 + struct tcp_sock *tp = tcp_sk(sk);
 + if (!tp-ucopy.dma_chan  tp-ucopy.locked_list)
 + tp-ucopy.dma_chan = get_softnet_dma();
 + if (tp-ucopy.dma_chan)
 + ret = tcp_v4_do_rcv(sk, skb);
 + else
 +#endif
 + {
 +