> On 31 Oct 2014, at 22:37, Alexander Bluhm <alexander.bl...@gmx.net> wrote: > > On Fri, Oct 31, 2014 at 02:50:00PM +1000, David Gwynne wrote: >> so without splicing, the payloads from multiple tcp packets (at least all of >> the ones in a single softnet run?) get bundled up into a buffer that >> userland reads and then writes out again in a single go. right? >> >> you're suggesting the taskq as a way to defer output till after the current >> softnet call has processed all its packets and queued all the tcp packet >> payloads onto the socket? > > Exactly. > >> its worth remembering there are other memory costs too. i think a kthread >> (the thing taskqs run on) is 5 pages amd64, so 20KB. > > We can delay the creation of the sosplice thread until user-land > tries to splice for the first time. > > I would like to get in the sosplice pool in first. The user-land > part was missing in my previous diff. Updated diff that actually > can do make build. > > ok?
the pool change seems fine. > > bluhm > > Index: sys/kern/kern_sysctl.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v > retrieving revision 1.267 > diff -u -p -u -p -r1.267 kern_sysctl.c > --- sys/kern/kern_sysctl.c 17 Oct 2014 01:51:39 -0000 1.267 > +++ sys/kern/kern_sysctl.c 31 Oct 2014 10:23:44 -0000 > @@ -1062,11 +1062,12 @@ fill_file(struct kinfo_file *kf, struct > kf->so_family = so->so_proto->pr_domain->dom_family; > kf->so_rcv_cc = so->so_rcv.sb_cc; > kf->so_snd_cc = so->so_snd.sb_cc; > - if (so->so_splice) { > + if (isspliced(so)) { > if (show_pointers) > - kf->so_splice = PTRTOINT64(so->so_splice); > - kf->so_splicelen = so->so_splicelen; > - } else if (so->so_spliceback) > + kf->so_splice = > + PTRTOINT64(so->so_sp->ssp_socket); > + kf->so_splicelen = so->so_sp->ssp_len; > + } else if (issplicedback(so)) > kf->so_splicelen = -1; > if (!so->so_pcb) > break; > Index: sys/kern/uipc_socket.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.133 > diff -u -p -u -p -r1.133 uipc_socket.c > --- sys/kern/uipc_socket.c 9 Sep 2014 02:07:17 -0000 1.133 > +++ sys/kern/uipc_socket.c 31 Oct 2014 10:23:44 -0000 > @@ -80,12 +80,19 @@ int somaxconn = SOMAXCONN; > int sominconn = SOMINCONN; > > struct pool socket_pool; > +#ifdef SOCKET_SPLICE > +struct pool sosplice_pool; > +#endif > > void > soinit(void) > { > > pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); > +#ifdef SOCKET_SPLICE > + pool_init(&sosplice_pool, sizeof(struct sosplice), 0, 0, 0, "sosppl", > + NULL); > +#endif > } > > /* > @@ -157,7 +164,7 @@ solisten(struct socket *so, int backlog) > if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) > return (EOPNOTSUPP); > #ifdef SOCKET_SPLICE > - if (so->so_splice || so->so_spliceback) > + if (isspliced(so) || issplicedback(so)) > return (EOPNOTSUPP); > #endif /* SOCKET_SPLICE */ > s = splsoftnet(); > @@ -199,10 +206,15 @@ sofree(struct socket *so) > return; > } > #ifdef SOCKET_SPLICE > - if (so->so_spliceback) > - sounsplice(so->so_spliceback, so, so->so_spliceback != so); > - if (so->so_splice) > - sounsplice(so, so->so_splice, 0); > + if (so->so_sp) { > + if (issplicedback(so)) > + sounsplice(so->so_sp->ssp_soback, so, > + so->so_sp->ssp_soback != so); > + if (isspliced(so)) > + sounsplice(so, so->so_sp->ssp_socket, 0); > + pool_put(&sosplice_pool, so->so_sp); > + so->so_sp = NULL; > + } > #endif /* SOCKET_SPLICE */ > sbrelease(&so->so_snd); > sorflush(so); > @@ -647,7 +659,7 @@ restart: > > m = so->so_rcv.sb_mb; > #ifdef SOCKET_SPLICE > - if (so->so_splice) > + if (isspliced(so)) > m = NULL; > #endif /* SOCKET_SPLICE */ > /* > @@ -669,7 +681,7 @@ restart: > #ifdef DIAGNOSTIC > if (m == NULL && so->so_rcv.sb_cc) > #ifdef SOCKET_SPLICE > - if (so->so_splice == NULL) > + if (!isspliced(so)) > #endif /* SOCKET_SPLICE */ > panic("receive 1"); > #endif > @@ -1021,6 +1033,12 @@ sorflush(struct socket *so) > } > > #ifdef SOCKET_SPLICE > + > +#define so_splicelen so_sp->ssp_len > +#define so_splicemax so_sp->ssp_max > +#define so_idletv so_sp->ssp_idletv > +#define so_idleto so_sp->ssp_idleto > + > int > sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) > { > @@ -1035,6 +1053,8 @@ sosplice(struct socket *so, int fd, off_ > if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && > (so->so_proto->pr_flags & PR_CONNREQUIRED)) > return (ENOTCONN); > + if (so->so_sp == NULL) > + so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); > > /* If no fd is given, unsplice by removing existing link. */ > if (fd < 0) { > @@ -1043,8 +1063,8 @@ sosplice(struct socket *so, int fd, off_ > (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) > return (error); > s = splsoftnet(); > - if (so->so_splice) > - sounsplice(so, so->so_splice, 1); > + if (so->so_sp->ssp_socket) > + sounsplice(so, so->so_sp->ssp_socket, 1); > splx(s); > sbunlock(&so->so_rcv); > return (0); > @@ -1060,6 +1080,8 @@ sosplice(struct socket *so, int fd, off_ > if ((error = getsock(curproc->p_fd, fd, &fp)) != 0) > return (error); > sosp = fp->f_data; > + if (sosp->so_sp == NULL) > + sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); > > /* Lock both receive and send buffer. */ > if ((error = sblock(&so->so_rcv, > @@ -1074,7 +1096,7 @@ sosplice(struct socket *so, int fd, off_ > } > s = splsoftnet(); > > - if (so->so_splice || sosp->so_spliceback) { > + if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { > error = EBUSY; > goto release; > } > @@ -1092,8 +1114,8 @@ sosplice(struct socket *so, int fd, off_ > } > > /* Splice so and sosp together. */ > - so->so_splice = sosp; > - sosp->so_spliceback = so; > + so->so_sp->ssp_socket = sosp; > + sosp->so_sp->ssp_soback = so; > so->so_splicelen = 0; > so->so_splicemax = max; > if (tv) > @@ -1127,7 +1149,7 @@ sounsplice(struct socket *so, struct soc > timeout_del(&so->so_idleto); > sosp->so_snd.sb_flagsintr &= ~SB_SPLICE; > so->so_rcv.sb_flagsintr &= ~SB_SPLICE; > - so->so_splice = sosp->so_spliceback = NULL; > + so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; > if (wakeup && soreadable(so)) > sorwakeup(so); > } > @@ -1139,9 +1161,9 @@ soidle(void *arg) > int s; > > s = splsoftnet(); > - if (so->so_splice) { > + if (so->so_rcv.sb_flagsintr & SB_SPLICE) { > so->so_error = ETIMEDOUT; > - sounsplice(so, so->so_splice, 1); > + sounsplice(so, so->so_sp->ssp_socket, 1); > } > splx(s); > } > @@ -1155,7 +1177,7 @@ soidle(void *arg) > int > somove(struct socket *so, int wait) > { > - struct socket *sosp = so->so_splice; > + struct socket *sosp = so->so_sp->ssp_socket; > struct mbuf *m, **mp, *nextrecord; > u_long len, off, oobmark; > long space; > @@ -1408,6 +1430,12 @@ somove(struct socket *so, int wait) > timeout_add_tv(&so->so_idleto, &so->so_idletv); > return (1); > } > + > +#undef so_splicelen > +#undef so_splicemax > +#undef so_idletv > +#undef so_idleto > + > #endif /* SOCKET_SPLICE */ > > void > @@ -1416,7 +1444,7 @@ sorwakeup(struct socket *so) > #ifdef SOCKET_SPLICE > if (so->so_rcv.sb_flagsintr & SB_SPLICE) > (void) somove(so, M_DONTWAIT); > - if (so->so_splice) > + if (isspliced(so)) > return; > #endif > sowakeup(so, &so->so_rcv); > @@ -1429,7 +1457,7 @@ sowwakeup(struct socket *so) > { > #ifdef SOCKET_SPLICE > if (so->so_snd.sb_flagsintr & SB_SPLICE) > - (void) somove(so->so_spliceback, M_DONTWAIT); > + (void) somove(so->so_sp->ssp_soback, M_DONTWAIT); > #endif > sowakeup(so, &so->so_snd); > } > @@ -1722,11 +1750,12 @@ sogetopt(struct socket *so, int level, i > #ifdef SOCKET_SPLICE > case SO_SPLICE: > { > + off_t len; > int s = splsoftnet(); > > m->m_len = sizeof(off_t); > - memcpy(mtod(m, off_t *), &so->so_splicelen, > - sizeof(off_t)); > + len = so->so_sp ? so->so_sp->ssp_len : 0; > + memcpy(mtod(m, off_t *), &len, sizeof(off_t)); > splx(s); > break; > } > @@ -1815,7 +1844,7 @@ filt_soread(struct knote *kn, long hint) > > kn->kn_data = so->so_rcv.sb_cc; > #ifdef SOCKET_SPLICE > - if (so->so_splice) > + if (isspliced(so)) > return (0); > #endif /* SOCKET_SPLICE */ > if (so->so_state & SS_CANTRCVMORE) { > Index: sys/sys/socketvar.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v > retrieving revision 1.56 > diff -u -p -u -p -r1.56 socketvar.h > --- sys/sys/socketvar.h 9 Sep 2014 02:07:17 -0000 1.56 > +++ sys/sys/socketvar.h 31 Oct 2014 10:23:44 -0000 > @@ -81,13 +81,17 @@ struct socket { > uid_t so_siguid; /* uid of process who set so_pgid */ > uid_t so_sigeuid; /* euid of process who set so_pgid */ > u_long so_oobmark; /* chars to oob mark */ > - > - struct socket *so_splice; /* send data to drain socket */ > - struct socket *so_spliceback; /* back ref for notify and cleanup */ > - off_t so_splicelen; /* number of bytes spliced so far */ > - off_t so_splicemax; /* maximum number of bytes to splice */ > - struct timeval so_idletv; /* idle timeout */ > - struct timeout so_idleto; > +/* > + * Variables for socket splicing, allocated only when needed. > + */ > + struct sosplice { > + struct socket *ssp_socket; /* send data to drain socket */ > + struct socket *ssp_soback; /* back ref to source socket */ > + off_t ssp_len; /* number of bytes spliced */ > + off_t ssp_max; /* maximum number of bytes */ > + struct timeval ssp_idletv; /* idle timeout */ > + struct timeout ssp_idleto; > + } *so_sp; > /* > * Variables for socket buffering. > */ > @@ -148,6 +152,9 @@ struct socket { > * Macros for sockets and socket buffering. > */ > > +#define isspliced(so) ((so)->so_sp && (so)->so_sp->ssp_socket) > +#define issplicedback(so) ((so)->so_sp && (so)->so_sp->ssp_soback) > + > /* > * Do we need to notify the other side when I/O is possible? > */ > @@ -173,7 +180,7 @@ struct socket { > > /* can we read something from so? */ > #define soreadable(so) \ > - ((so)->so_splice == NULL && \ > + (!isspliced(so) && \ > ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \ > ((so)->so_state & SS_CANTRCVMORE) || \ > (so)->so_qlen || (so)->so_error)) > Index: lib/libkvm/kvm_file2.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/lib/libkvm/kvm_file2.c,v > retrieving revision 1.38 > diff -u -p -u -p -r1.38 kvm_file2.c > --- lib/libkvm/kvm_file2.c 25 Oct 2014 03:18:58 -0000 1.38 > +++ lib/libkvm/kvm_file2.c 31 Oct 2014 11:40:05 -0000 > @@ -542,6 +542,7 @@ fill_file(kvm_t *kd, struct kinfo_file * > > case DTYPE_SOCKET: { > struct socket sock; > + struct sosplice ssp; > struct protosw protosw; > struct domain domain; > > @@ -565,11 +566,18 @@ fill_file(kvm_t *kd, struct kinfo_file * > kf->so_family = domain.dom_family; > kf->so_rcv_cc = sock.so_rcv.sb_cc; > kf->so_snd_cc = sock.so_snd.sb_cc; > - if (sock.so_splice) { > - kf->so_splice = PTRTOINT64(sock.so_splice); > - kf->so_splicelen = sock.so_splicelen; > - } else if (sock.so_spliceback) > - kf->so_splicelen = -1; > + if (sock.so_sp) { > + if (KREAD(kd, (u_long)sock.so_sp, &ssp)) { > + _kvm_err(kd, kd->program, "can't read splice"); > + return (-1); > + } > + if (ssp.ssp_socket) { > + kf->so_splice = PTRTOINT64(ssp.ssp_socket); > + kf->so_splicelen = ssp.ssp_len; > + } else if (ssp.ssp_soback) { > + kf->so_splicelen = -1; > + } > + } > if (!sock.so_pcb) > break; > switch (kf->so_family) { > Index: usr.bin/netstat/inet.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/usr.bin/netstat/inet.c,v > retrieving revision 1.136 > diff -u -p -u -p -r1.136 inet.c > --- usr.bin/netstat/inet.c 26 Oct 2014 14:43:03 -0000 1.136 > +++ usr.bin/netstat/inet.c 31 Oct 2014 11:00:17 -0000 > @@ -91,6 +91,7 @@ char *inetname(struct in_addr *); > void inetprint(struct in_addr *, in_port_t, char *, int); > char *inet6name(struct in6_addr *); > void inet6print(struct in6_addr *, int, char *); > +void sosplice_dump(u_long); > void sockbuf_dump(struct sockbuf *, const char *); > void protosw_dump(u_long, u_long); > void domain_dump(u_long, u_long, short); > @@ -1166,7 +1167,6 @@ socket_dump(u_long off) > kread(off, &so, sizeof(so)); > > #define p(fmt, v, sep) printf(#v " " fmt sep, so.v); > -#define pll(fmt, v, sep) printf(#v " " fmt sep, (long long) so.v); > #define pp(fmt, v, sep) printf(#v " " fmt sep, hideroot ? 0 : so.v); > printf("socket %#lx\n ", hideroot ? 0 : off); > p("%#.4x", so_type, "\n "); > @@ -1185,12 +1185,8 @@ socket_dump(u_long off) > p("%u", so_siguid, ", "); > p("%u", so_sigeuid, "\n "); > p("%lu", so_oobmark, "\n "); > - pp("%p", so_splice, ", "); > - pp("%p", so_spliceback, "\n "); > - p("%lld", so_splicelen, ", "); > - p("%lld", so_splicemax, ", "); > - pll("%lld", so_idletv.tv_sec, ", "); > - p("%ld", so_idletv.tv_usec, "\n "); > + if (so.so_sp) > + sosplice_dump((u_long)so.so_sp); > sockbuf_dump(&so.so_rcv, "so_rcv"); > sockbuf_dump(&so.so_snd, "so_snd"); > p("%u", so_euid, ", "); > @@ -1204,6 +1200,32 @@ socket_dump(u_long off) > if (!vflag) > return; > protosw_dump((u_long)so.so_proto, (u_long)so.so_pcb); > +} > + > +/* > + * Dump the contents of a struct sosplice > + */ > +void > +sosplice_dump(u_long off) > +{ > + struct sosplice ssp; > + > + if (off == 0) > + return; > + kread(off, &ssp, sizeof(ssp)); > + > +#define p(fmt, v, sep) printf(#v " " fmt sep, ssp.v); > +#define pll(fmt, v, sep) printf(#v " " fmt sep, (long long) ssp.v); > +#define pp(fmt, v, sep) printf(#v " " fmt sep, hideroot ? 0 : ssp.v); > + pp("%p", ssp_socket, ", "); > + pp("%p", ssp_soback, "\n "); > + p("%lld", ssp_len, ", "); > + p("%lld", ssp_max, ", "); > + pll("%lld", ssp_idletv.tv_sec, ", "); > + p("%ld", ssp_idletv.tv_usec, "\n "); > +#undef p > +#undef pll > +#undef pp > } > > /*