> On 31 Oct 2014, at 07:10, Alexander Bluhm <[email protected]> wrote:
>
> Hi,
>
> Some performance measurements showed that socket splicing for TCP
> can be made faster. The main slowdown was that tcp_output() got
> called for every incomming packet. When copying through user-land
> this cannot happen as the scheduler gets involved.
so without splicing, the payloads from multiple tcp packets (at least all of
the ones in a single softnet run?) get bundled up into a buffer that userland
reads and then writes out again in a single go. right?
you're suggesting the taskq as a way to defer output till after the current
softnet call has processed all its packets and queued all the tcp packet
payloads onto the socket?
>
> So my idea is to do the socket splicing for TCP in a special kernel
> thread. One drawback might be that the struct socket gets larger.
> On amd64 that is from 472 to 520 bytes. I could try to put the
> splicing fields into a seperate struct that gets only allocated
> when needed.
its worth remembering there are other memory costs too. i think a kthread (the
thing taskqs run on) is 5 pages amd64, so 20KB.
> Does someone want to do some performance measurements with relayd?
>
> ok?
>
> bluhm
>
> Index: sys/kern/uipc_socket.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.133
> diff -u -p -u -p -r1.133 uipc_socket.c
> --- sys/kern/uipc_socket.c 9 Sep 2014 02:07:17 -0000 1.133
> +++ sys/kern/uipc_socket.c 30 Oct 2014 18:56:28 -0000
> @@ -56,6 +56,7 @@ void sbsync(struct sockbuf *, struct mbu
> int sosplice(struct socket *, int, off_t, struct timeval *);
> void sounsplice(struct socket *, struct socket *, int);
> void soidle(void *);
> +void sotask(void *, void *);
> int somove(struct socket *, int);
>
> void filt_sordetach(struct knote *kn);
> @@ -80,12 +81,18 @@ int somaxconn = SOMAXCONN;
> int sominconn = SOMINCONN;
>
> struct pool socket_pool;
> +#ifdef SOCKET_SPLICE
> +struct taskq *sosplice_taskq;
> +#endif
>
> void
> soinit(void)
> {
>
> pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL);
> +#ifdef SOCKET_SPLICE
> + sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET);
> +#endif
> }
>
> /*
> @@ -1101,6 +1108,7 @@ sosplice(struct socket *so, int fd, off_
> else
> timerclear(&so->so_idletv);
> timeout_set(&so->so_idleto, soidle, so);
> + task_set(&so->so_splicetask, sotask, so, NULL);
>
> /*
> * To prevent softnet interrupt from calling somove() while
> @@ -1124,6 +1132,7 @@ sounsplice(struct socket *so, struct soc
> {
> splsoftassert(IPL_SOFTNET);
>
> + task_del(sosplice_taskq, &so->so_splicetask);
> timeout_del(&so->so_idleto);
> sosp->so_snd.sb_flagsintr &= ~SB_SPLICE;
> so->so_rcv.sb_flagsintr &= ~SB_SPLICE;
> @@ -1139,13 +1148,34 @@ soidle(void *arg)
> int s;
>
> s = splsoftnet();
> - if (so->so_splice) {
> + if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> so->so_error = ETIMEDOUT;
> sounsplice(so, so->so_splice, 1);
> }
> splx(s);
> }
>
> +void
> +sotask(void *arg1, void *arg2)
> +{
> + struct socket *so = arg1;
> + int s;
> +
> + s = splsoftnet();
> + if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> + /*
> + * We may not sleep here as sofree() and unsplice() may be
> + * called from softnet interrupt context. This would remove
> + * the socket during somove().
> + */
> + somove(so, M_DONTWAIT);
> + }
> + splx(s);
> +
> + /* Avoid user land starvation. */
> + yield();
> +}
> +
> /*
> * Move data from receive buffer of spliced source socket to send
> * buffer of drain socket. Try to move as much as possible in one
> @@ -1414,8 +1444,20 @@ void
> sorwakeup(struct socket *so)
> {
> #ifdef SOCKET_SPLICE
> - if (so->so_rcv.sb_flagsintr & SB_SPLICE)
> - (void) somove(so, M_DONTWAIT);
> + if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
> + /*
> + * TCP has a sendbuffer that can handle multiple packets
> + * at once. So queue the stream a bit to accumulate data.
> + * The sosplice thread will call somove() later and send
> + * the packets calling tcp_output() only once.
> + * In the UDP case, send out the packets immediately.
> + * Using a thread would make things slower.
> + */
> + if (so->so_proto->pr_flags & PR_WANTRCVD)
> + task_add(sosplice_taskq, &so->so_splicetask);
> + else
> + somove(so, M_DONTWAIT);
> + }
> if (so->so_splice)
> return;
> #endif
> @@ -1429,7 +1471,7 @@ sowwakeup(struct socket *so)
> {
> #ifdef SOCKET_SPLICE
> if (so->so_snd.sb_flagsintr & SB_SPLICE)
> - (void) somove(so->so_spliceback, M_DONTWAIT);
> + task_add(sosplice_taskq, &so->so_spliceback->so_splicetask);
> #endif
> sowakeup(so, &so->so_snd);
> }
> Index: sys/sys/socketvar.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.56
> diff -u -p -u -p -r1.56 socketvar.h
> --- sys/sys/socketvar.h 9 Sep 2014 02:07:17 -0000 1.56
> +++ sys/sys/socketvar.h 30 Oct 2014 18:32:16 -0000
> @@ -34,6 +34,7 @@
>
> #include <sys/selinfo.h> /* for struct selinfo */
> #include <sys/queue.h>
> +#include <sys/task.h>
> #include <sys/timeout.h>
>
> #ifndef _SOCKLEN_T_DEFINED_
> @@ -88,6 +89,7 @@ struct socket {
> off_t so_splicemax; /* maximum number of bytes to splice */
> struct timeval so_idletv; /* idle timeout */
> struct timeout so_idleto;
> + struct task so_splicetask; /* task for somove */
> /*
> * Variables for socket buffering.
> */
>