Re: [PATCH v3 net-next 2/2] selftests: net: tcp_mmap must use TCP_ZEROCOPY_RECEIVE

2018-04-26 Thread Soheil Hassas Yeganeh
On Thu, Apr 26, 2018 at 10:50 AM, Eric Dumazet  wrote:
> After prior kernel change, mmap() on TCP socket only reserves VMA.
>
> We have to use getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...)
> to perform the transfert of pages from skbs in TCP receive queue into such 
> VMA.
>
> struct tcp_zerocopy_receive {
> __u64 address;  /* in: address of mapping */
> __u32 length;   /* in/out: number of bytes to map/mapped */
> __u32 recv_skip_hint;   /* out: amount of bytes to skip */
> };
>
> After a successful getsockopt(...TCP_ZEROCOPY_RECEIVE...), @length contains
> number of bytes that were mapped, and @recv_skip_hint contains number of bytes
> that should be read using conventional read()/recv()/recvmsg() system calls,
> to skip a sequence of bytes that can not be mapped, because not properly page
> aligned.
>
> Signed-off-by: Eric Dumazet 
> Cc: Andy Lutomirski 
> Cc: Soheil Hassas Yeganeh 

Acked-by: Soheil Hassas Yeganeh 

Thank you, again!

> ---
>  tools/testing/selftests/net/tcp_mmap.c | 64 +++---
>  1 file changed, 37 insertions(+), 27 deletions(-)
>
> diff --git a/tools/testing/selftests/net/tcp_mmap.c 
> b/tools/testing/selftests/net/tcp_mmap.c
> index 
> dea342fe6f4e88b5709d2ac37b2fc9a2a320bf44..77f762780199ff1f69f9f6b3f18e72deddb69f5e
>  100644
> --- a/tools/testing/selftests/net/tcp_mmap.c
> +++ b/tools/testing/selftests/net/tcp_mmap.c
> @@ -76,9 +76,10 @@
>  #include 
>  #include 
>  #include 
> -#include 
>  #include 
>  #include 
> +#include 
> +#include 
>
>  #ifndef MSG_ZEROCOPY
>  #define MSG_ZEROCOPY0x400
> @@ -134,11 +135,12 @@ void hash_zone(void *zone, unsigned int length)
>  void *child_thread(void *arg)
>  {
> unsigned long total_mmap = 0, total = 0;
> +   struct tcp_zerocopy_receive zc;
> unsigned long delta_usec;
> int flags = MAP_SHARED;
> struct timeval t0, t1;
> char *buffer = NULL;
> -   void *oaddr = NULL;
> +   void *addr = NULL;
> double throughput;
> struct rusage ru;
> int lu, fd;
> @@ -153,41 +155,46 @@ void *child_thread(void *arg)
> perror("malloc");
> goto error;
> }
> +   if (zflg) {
> +   addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
> +   if (addr == (void *)-1)
> +   zflg = 0;
> +   }
> while (1) {
> struct pollfd pfd = { .fd = fd, .events = POLLIN, };
> int sub;
>
> poll(, 1, 1);
> if (zflg) {
> -   void *naddr;
> +   socklen_t zc_len = sizeof(zc);
> +   int res;
>
> -   naddr = mmap(oaddr, chunk_size, PROT_READ, flags, fd, 
> 0);
> -   if (naddr == (void *)-1) {
> -   if (errno == EAGAIN) {
> -   /* That is if SO_RCVLOWAT is buggy */
> -   usleep(1000);
> -   continue;
> -   }
> -   if (errno == EINVAL) {
> -   flags = MAP_SHARED;
> -   oaddr = NULL;
> -   goto fallback;
> -   }
> -   if (errno != EIO)
> -   perror("mmap()");
> +   zc.address = (__u64)addr;
> +   zc.length = chunk_size;
> +   zc.recv_skip_hint = 0;
> +   res = getsockopt(fd, IPPROTO_TCP, 
> TCP_ZEROCOPY_RECEIVE,
> +, _len);
> +   if (res == -1)
> break;
> +
> +   if (zc.length) {
> +   assert(zc.length <= chunk_size);
> +   total_mmap += zc.length;
> +   if (xflg)
> +   hash_zone(addr, zc.length);
> +   total += zc.length;
> }
> -   total_mmap += chunk_size;
> -   if (xflg)
> -   hash_zone(naddr, chunk_size);
> -   total += chunk_size;
> -   if (!keepflag) {
> -   flags |= MAP_FIXED;
> -   oaddr = naddr;
> +   if (zc.recv_skip_hint) {
> +   assert(zc.recv_skip_hint <= chunk_size);
> +   lu = read(fd, buffer, zc.recv_skip_hint);
> +   if (lu > 0) {
> + 

[PATCH v3 net-next 2/2] selftests: net: tcp_mmap must use TCP_ZEROCOPY_RECEIVE

2018-04-26 Thread Eric Dumazet
After prior kernel change, mmap() on TCP socket only reserves VMA.

We have to use getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...)
to perform the transfert of pages from skbs in TCP receive queue into such VMA.

struct tcp_zerocopy_receive {
__u64 address;  /* in: address of mapping */
__u32 length;   /* in/out: number of bytes to map/mapped */
__u32 recv_skip_hint;   /* out: amount of bytes to skip */
};

After a successful getsockopt(...TCP_ZEROCOPY_RECEIVE...), @length contains
number of bytes that were mapped, and @recv_skip_hint contains number of bytes
that should be read using conventional read()/recv()/recvmsg() system calls,
to skip a sequence of bytes that can not be mapped, because not properly page
aligned.

Signed-off-by: Eric Dumazet 
Cc: Andy Lutomirski 
Cc: Soheil Hassas Yeganeh 
---
 tools/testing/selftests/net/tcp_mmap.c | 64 +++---
 1 file changed, 37 insertions(+), 27 deletions(-)

diff --git a/tools/testing/selftests/net/tcp_mmap.c 
b/tools/testing/selftests/net/tcp_mmap.c
index 
dea342fe6f4e88b5709d2ac37b2fc9a2a320bf44..77f762780199ff1f69f9f6b3f18e72deddb69f5e
 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -76,9 +76,10 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
+#include 
+#include 
 
 #ifndef MSG_ZEROCOPY
 #define MSG_ZEROCOPY0x400
@@ -134,11 +135,12 @@ void hash_zone(void *zone, unsigned int length)
 void *child_thread(void *arg)
 {
unsigned long total_mmap = 0, total = 0;
+   struct tcp_zerocopy_receive zc;
unsigned long delta_usec;
int flags = MAP_SHARED;
struct timeval t0, t1;
char *buffer = NULL;
-   void *oaddr = NULL;
+   void *addr = NULL;
double throughput;
struct rusage ru;
int lu, fd;
@@ -153,41 +155,46 @@ void *child_thread(void *arg)
perror("malloc");
goto error;
}
+   if (zflg) {
+   addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
+   if (addr == (void *)-1)
+   zflg = 0;
+   }
while (1) {
struct pollfd pfd = { .fd = fd, .events = POLLIN, };
int sub;
 
poll(, 1, 1);
if (zflg) {
-   void *naddr;
+   socklen_t zc_len = sizeof(zc);
+   int res;
 
-   naddr = mmap(oaddr, chunk_size, PROT_READ, flags, fd, 
0);
-   if (naddr == (void *)-1) {
-   if (errno == EAGAIN) {
-   /* That is if SO_RCVLOWAT is buggy */
-   usleep(1000);
-   continue;
-   }
-   if (errno == EINVAL) {
-   flags = MAP_SHARED;
-   oaddr = NULL;
-   goto fallback;
-   }
-   if (errno != EIO)
-   perror("mmap()");
+   zc.address = (__u64)addr;
+   zc.length = chunk_size;
+   zc.recv_skip_hint = 0;
+   res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
+, _len);
+   if (res == -1)
break;
+
+   if (zc.length) {
+   assert(zc.length <= chunk_size);
+   total_mmap += zc.length;
+   if (xflg)
+   hash_zone(addr, zc.length);
+   total += zc.length;
}
-   total_mmap += chunk_size;
-   if (xflg)
-   hash_zone(naddr, chunk_size);
-   total += chunk_size;
-   if (!keepflag) {
-   flags |= MAP_FIXED;
-   oaddr = naddr;
+   if (zc.recv_skip_hint) {
+   assert(zc.recv_skip_hint <= chunk_size);
+   lu = read(fd, buffer, zc.recv_skip_hint);
+   if (lu > 0) {
+   if (xflg)
+   hash_zone(buffer, lu);
+   total += lu;
+   }
}
continue;
}
-fallback:
sub = 0;
while (sub < chunk_size) {
lu = read(fd, buffer