> On 31 Oct 2014, at 22:37, Alexander Bluhm <alexander.bl...@gmx.net> wrote:
> 
> On Fri, Oct 31, 2014 at 02:50:00PM +1000, David Gwynne wrote:
>> so without splicing, the payloads from multiple tcp packets (at least all of 
>> the ones in a single softnet run?) get bundled up into a buffer that 
>> userland reads and then writes out again in a single go. right?
>> 
>> you're suggesting the taskq as a way to defer output till after the current 
>> softnet call has processed all its packets and queued all the tcp packet 
>> payloads onto the socket?
> 
> Exactly.
> 
>> its worth remembering there are other memory costs too. i think a kthread 
>> (the thing taskqs run on) is 5 pages amd64, so 20KB.
> 
> We can delay the creation of the sosplice thread until user-land
> tries to splice for the first time.
> 
> I would like to get in the sosplice pool in first.  The user-land
> part was missing in my previous diff.  Updated diff that actually
> can do make build.
> 
> ok?

the pool change seems fine.

> 
> bluhm
> 
> Index: sys/kern/kern_sysctl.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.267
> diff -u -p -u -p -r1.267 kern_sysctl.c
> --- sys/kern/kern_sysctl.c    17 Oct 2014 01:51:39 -0000      1.267
> +++ sys/kern/kern_sysctl.c    31 Oct 2014 10:23:44 -0000
> @@ -1062,11 +1062,12 @@ fill_file(struct kinfo_file *kf, struct 
>               kf->so_family = so->so_proto->pr_domain->dom_family;
>               kf->so_rcv_cc = so->so_rcv.sb_cc;
>               kf->so_snd_cc = so->so_snd.sb_cc;
> -             if (so->so_splice) {
> +             if (isspliced(so)) {
>                       if (show_pointers)
> -                             kf->so_splice = PTRTOINT64(so->so_splice);
> -                     kf->so_splicelen = so->so_splicelen;
> -             } else if (so->so_spliceback)
> +                             kf->so_splice =
> +                                 PTRTOINT64(so->so_sp->ssp_socket);
> +                     kf->so_splicelen = so->so_sp->ssp_len;
> +             } else if (issplicedback(so))
>                       kf->so_splicelen = -1;
>               if (!so->so_pcb)
>                       break;
> Index: sys/kern/uipc_socket.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.133
> diff -u -p -u -p -r1.133 uipc_socket.c
> --- sys/kern/uipc_socket.c    9 Sep 2014 02:07:17 -0000       1.133
> +++ sys/kern/uipc_socket.c    31 Oct 2014 10:23:44 -0000
> @@ -80,12 +80,19 @@ int       somaxconn = SOMAXCONN;
> int   sominconn = SOMINCONN;
> 
> struct pool socket_pool;
> +#ifdef SOCKET_SPLICE
> +struct pool sosplice_pool;
> +#endif
> 
> void
> soinit(void)
> {
> 
>       pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL);
> +#ifdef SOCKET_SPLICE
> +     pool_init(&sosplice_pool, sizeof(struct sosplice), 0, 0, 0, "sosppl",
> +         NULL);
> +#endif
> }
> 
> /*
> @@ -157,7 +164,7 @@ solisten(struct socket *so, int backlog)
>       if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
>               return (EOPNOTSUPP);
> #ifdef SOCKET_SPLICE
> -     if (so->so_splice || so->so_spliceback)
> +     if (isspliced(so) || issplicedback(so))
>               return (EOPNOTSUPP);
> #endif /* SOCKET_SPLICE */
>       s = splsoftnet();
> @@ -199,10 +206,15 @@ sofree(struct socket *so)
>                       return;
>       }
> #ifdef SOCKET_SPLICE
> -     if (so->so_spliceback)
> -             sounsplice(so->so_spliceback, so, so->so_spliceback != so);
> -     if (so->so_splice)
> -             sounsplice(so, so->so_splice, 0);
> +     if (so->so_sp) {
> +             if (issplicedback(so))
> +                     sounsplice(so->so_sp->ssp_soback, so,
> +                         so->so_sp->ssp_soback != so);
> +             if (isspliced(so))
> +                     sounsplice(so, so->so_sp->ssp_socket, 0);
> +             pool_put(&sosplice_pool, so->so_sp);
> +             so->so_sp = NULL;
> +     }
> #endif /* SOCKET_SPLICE */
>       sbrelease(&so->so_snd);
>       sorflush(so);
> @@ -647,7 +659,7 @@ restart:
> 
>       m = so->so_rcv.sb_mb;
> #ifdef SOCKET_SPLICE
> -     if (so->so_splice)
> +     if (isspliced(so))
>               m = NULL;
> #endif /* SOCKET_SPLICE */
>       /*
> @@ -669,7 +681,7 @@ restart:
> #ifdef DIAGNOSTIC
>               if (m == NULL && so->so_rcv.sb_cc)
> #ifdef SOCKET_SPLICE
> -                 if (so->so_splice == NULL)
> +                 if (!isspliced(so))
> #endif /* SOCKET_SPLICE */
>                       panic("receive 1");
> #endif
> @@ -1021,6 +1033,12 @@ sorflush(struct socket *so)
> }
> 
> #ifdef SOCKET_SPLICE
> +
> +#define so_splicelen so_sp->ssp_len
> +#define so_splicemax so_sp->ssp_max
> +#define so_idletv    so_sp->ssp_idletv
> +#define so_idleto    so_sp->ssp_idleto
> +
> int
> sosplice(struct socket *so, int fd, off_t max, struct timeval *tv)
> {
> @@ -1035,6 +1053,8 @@ sosplice(struct socket *so, int fd, off_
>       if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
>           (so->so_proto->pr_flags & PR_CONNREQUIRED))
>               return (ENOTCONN);
> +     if (so->so_sp == NULL)
> +             so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
> 
>       /* If no fd is given, unsplice by removing existing link. */
>       if (fd < 0) {
> @@ -1043,8 +1063,8 @@ sosplice(struct socket *so, int fd, off_
>                   (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0)
>                       return (error);
>               s = splsoftnet();
> -             if (so->so_splice)
> -                     sounsplice(so, so->so_splice, 1);
> +             if (so->so_sp->ssp_socket)
> +                     sounsplice(so, so->so_sp->ssp_socket, 1);
>               splx(s);
>               sbunlock(&so->so_rcv);
>               return (0);
> @@ -1060,6 +1080,8 @@ sosplice(struct socket *so, int fd, off_
>       if ((error = getsock(curproc->p_fd, fd, &fp)) != 0)
>               return (error);
>       sosp = fp->f_data;
> +     if (sosp->so_sp == NULL)
> +             sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
> 
>       /* Lock both receive and send buffer. */
>       if ((error = sblock(&so->so_rcv,
> @@ -1074,7 +1096,7 @@ sosplice(struct socket *so, int fd, off_
>       }
>       s = splsoftnet();
> 
> -     if (so->so_splice || sosp->so_spliceback) {
> +     if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
>               error = EBUSY;
>               goto release;
>       }
> @@ -1092,8 +1114,8 @@ sosplice(struct socket *so, int fd, off_
>       }
> 
>       /* Splice so and sosp together. */
> -     so->so_splice = sosp;
> -     sosp->so_spliceback = so;
> +     so->so_sp->ssp_socket = sosp;
> +     sosp->so_sp->ssp_soback = so;
>       so->so_splicelen = 0;
>       so->so_splicemax = max;
>       if (tv)
> @@ -1127,7 +1149,7 @@ sounsplice(struct socket *so, struct soc
>       timeout_del(&so->so_idleto);
>       sosp->so_snd.sb_flagsintr &= ~SB_SPLICE;
>       so->so_rcv.sb_flagsintr &= ~SB_SPLICE;
> -     so->so_splice = sosp->so_spliceback = NULL;
> +     so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
>       if (wakeup && soreadable(so))
>               sorwakeup(so);
> }
> @@ -1139,9 +1161,9 @@ soidle(void *arg)
>       int s;
> 
>       s = splsoftnet();
> -     if (so->so_splice) {
> +     if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
>               so->so_error = ETIMEDOUT;
> -             sounsplice(so, so->so_splice, 1);
> +             sounsplice(so, so->so_sp->ssp_socket, 1);
>       }
>       splx(s);
> }
> @@ -1155,7 +1177,7 @@ soidle(void *arg)
> int
> somove(struct socket *so, int wait)
> {
> -     struct socket   *sosp = so->so_splice;
> +     struct socket   *sosp = so->so_sp->ssp_socket;
>       struct mbuf     *m, **mp, *nextrecord;
>       u_long           len, off, oobmark;
>       long             space;
> @@ -1408,6 +1430,12 @@ somove(struct socket *so, int wait)
>               timeout_add_tv(&so->so_idleto, &so->so_idletv);
>       return (1);
> }
> +
> +#undef so_splicelen
> +#undef so_splicemax
> +#undef so_idletv
> +#undef so_idleto
> +
> #endif /* SOCKET_SPLICE */
> 
> void
> @@ -1416,7 +1444,7 @@ sorwakeup(struct socket *so)
> #ifdef SOCKET_SPLICE
>       if (so->so_rcv.sb_flagsintr & SB_SPLICE)
>               (void) somove(so, M_DONTWAIT);
> -     if (so->so_splice)
> +     if (isspliced(so))
>               return;
> #endif
>       sowakeup(so, &so->so_rcv);
> @@ -1429,7 +1457,7 @@ sowwakeup(struct socket *so)
> {
> #ifdef SOCKET_SPLICE
>       if (so->so_snd.sb_flagsintr & SB_SPLICE)
> -             (void) somove(so->so_spliceback, M_DONTWAIT);
> +             (void) somove(so->so_sp->ssp_soback, M_DONTWAIT);
> #endif
>       sowakeup(so, &so->so_snd);
> }
> @@ -1722,11 +1750,12 @@ sogetopt(struct socket *so, int level, i
> #ifdef SOCKET_SPLICE
>               case SO_SPLICE:
>                   {
> +                     off_t len;
>                       int s = splsoftnet();
> 
>                       m->m_len = sizeof(off_t);
> -                     memcpy(mtod(m, off_t *), &so->so_splicelen,
> -                         sizeof(off_t));
> +                     len = so->so_sp ? so->so_sp->ssp_len : 0;
> +                     memcpy(mtod(m, off_t *), &len, sizeof(off_t));
>                       splx(s);
>                       break;
>                   }
> @@ -1815,7 +1844,7 @@ filt_soread(struct knote *kn, long hint)
> 
>       kn->kn_data = so->so_rcv.sb_cc;
> #ifdef SOCKET_SPLICE
> -     if (so->so_splice)
> +     if (isspliced(so))
>               return (0);
> #endif /* SOCKET_SPLICE */
>       if (so->so_state & SS_CANTRCVMORE) {
> Index: sys/sys/socketvar.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.56
> diff -u -p -u -p -r1.56 socketvar.h
> --- sys/sys/socketvar.h       9 Sep 2014 02:07:17 -0000       1.56
> +++ sys/sys/socketvar.h       31 Oct 2014 10:23:44 -0000
> @@ -81,13 +81,17 @@ struct socket {
>       uid_t   so_siguid;              /* uid of process who set so_pgid */
>       uid_t   so_sigeuid;             /* euid of process who set so_pgid */
>       u_long  so_oobmark;             /* chars to oob mark */
> -
> -     struct  socket *so_splice;      /* send data to drain socket */
> -     struct  socket *so_spliceback;  /* back ref for notify and cleanup */
> -     off_t   so_splicelen;           /* number of bytes spliced so far */
> -     off_t   so_splicemax;           /* maximum number of bytes to splice */
> -     struct  timeval so_idletv;      /* idle timeout */
> -     struct  timeout so_idleto;
> +/*
> + * Variables for socket splicing, allocated only when needed.
> + */
> +     struct sosplice {
> +             struct  socket *ssp_socket;     /* send data to drain socket */
> +             struct  socket *ssp_soback;     /* back ref to source socket */
> +             off_t   ssp_len;                /* number of bytes spliced */
> +             off_t   ssp_max;                /* maximum number of bytes */
> +             struct  timeval ssp_idletv;     /* idle timeout */
> +             struct  timeout ssp_idleto;
> +     } *so_sp;
> /*
>  * Variables for socket buffering.
>  */
> @@ -148,6 +152,9 @@ struct socket {
>  * Macros for sockets and socket buffering.
>  */
> 
> +#define isspliced(so)                ((so)->so_sp && (so)->so_sp->ssp_socket)
> +#define issplicedback(so)    ((so)->so_sp && (so)->so_sp->ssp_soback)
> +
> /*
>  * Do we need to notify the other side when I/O is possible?
>  */
> @@ -173,7 +180,7 @@ struct socket {
> 
> /* can we read something from so? */
> #define       soreadable(so)  \
> -    ((so)->so_splice == NULL && \
> +    (!isspliced(so) && \
>     ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \
>     ((so)->so_state & SS_CANTRCVMORE) || \
>     (so)->so_qlen || (so)->so_error))
> Index: lib/libkvm/kvm_file2.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/lib/libkvm/kvm_file2.c,v
> retrieving revision 1.38
> diff -u -p -u -p -r1.38 kvm_file2.c
> --- lib/libkvm/kvm_file2.c    25 Oct 2014 03:18:58 -0000      1.38
> +++ lib/libkvm/kvm_file2.c    31 Oct 2014 11:40:05 -0000
> @@ -542,6 +542,7 @@ fill_file(kvm_t *kd, struct kinfo_file *
> 
>       case DTYPE_SOCKET: {
>               struct socket sock;
> +             struct sosplice ssp;
>               struct protosw protosw;
>               struct domain domain;
> 
> @@ -565,11 +566,18 @@ fill_file(kvm_t *kd, struct kinfo_file *
>               kf->so_family = domain.dom_family;
>               kf->so_rcv_cc = sock.so_rcv.sb_cc;
>               kf->so_snd_cc = sock.so_snd.sb_cc;
> -             if (sock.so_splice) {
> -                     kf->so_splice = PTRTOINT64(sock.so_splice);
> -                     kf->so_splicelen = sock.so_splicelen;
> -             } else if (sock.so_spliceback)
> -                     kf->so_splicelen = -1;
> +             if (sock.so_sp) {
> +                     if (KREAD(kd, (u_long)sock.so_sp, &ssp)) {
> +                             _kvm_err(kd, kd->program, "can't read splice");
> +                             return (-1);
> +                     }
> +                     if (ssp.ssp_socket) {
> +                             kf->so_splice = PTRTOINT64(ssp.ssp_socket);
> +                             kf->so_splicelen = ssp.ssp_len;
> +                     } else if (ssp.ssp_soback) {
> +                             kf->so_splicelen = -1;
> +                     }
> +             }
>               if (!sock.so_pcb)
>                       break;
>               switch (kf->so_family) {
> Index: usr.bin/netstat/inet.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/usr.bin/netstat/inet.c,v
> retrieving revision 1.136
> diff -u -p -u -p -r1.136 inet.c
> --- usr.bin/netstat/inet.c    26 Oct 2014 14:43:03 -0000      1.136
> +++ usr.bin/netstat/inet.c    31 Oct 2014 11:00:17 -0000
> @@ -91,6 +91,7 @@ char        *inetname(struct in_addr *);
> void  inetprint(struct in_addr *, in_port_t, char *, int);
> char  *inet6name(struct in6_addr *);
> void  inet6print(struct in6_addr *, int, char *);
> +void sosplice_dump(u_long);
> void  sockbuf_dump(struct sockbuf *, const char *);
> void  protosw_dump(u_long, u_long);
> void  domain_dump(u_long, u_long, short);
> @@ -1166,7 +1167,6 @@ socket_dump(u_long off)
>       kread(off, &so, sizeof(so));
> 
> #define       p(fmt, v, sep) printf(#v " " fmt sep, so.v);
> -#define      pll(fmt, v, sep) printf(#v " " fmt sep, (long long) so.v);
> #define       pp(fmt, v, sep) printf(#v " " fmt sep, hideroot ? 0 : so.v);
>       printf("socket %#lx\n ", hideroot ? 0 : off);
>       p("%#.4x", so_type, "\n ");
> @@ -1185,12 +1185,8 @@ socket_dump(u_long off)
>       p("%u", so_siguid, ", ");
>       p("%u", so_sigeuid, "\n ");
>       p("%lu", so_oobmark, "\n ");
> -     pp("%p", so_splice, ", ");
> -     pp("%p", so_spliceback, "\n ");
> -     p("%lld", so_splicelen, ", ");
> -     p("%lld", so_splicemax, ", ");
> -     pll("%lld", so_idletv.tv_sec, ", ");
> -     p("%ld", so_idletv.tv_usec, "\n ");
> +     if (so.so_sp)
> +             sosplice_dump((u_long)so.so_sp);
>       sockbuf_dump(&so.so_rcv, "so_rcv");
>       sockbuf_dump(&so.so_snd, "so_snd");
>       p("%u", so_euid, ", ");
> @@ -1204,6 +1200,32 @@ socket_dump(u_long off)
>       if (!vflag)
>               return;
>       protosw_dump((u_long)so.so_proto, (u_long)so.so_pcb);
> +}
> +
> +/*
> + * Dump the contents of a struct sosplice
> + */
> +void
> +sosplice_dump(u_long off)
> +{
> +     struct sosplice ssp;
> +
> +     if (off == 0)
> +             return;
> +     kread(off, &ssp, sizeof(ssp));
> +
> +#define      p(fmt, v, sep) printf(#v " " fmt sep, ssp.v);
> +#define      pll(fmt, v, sep) printf(#v " " fmt sep, (long long) ssp.v);
> +#define      pp(fmt, v, sep) printf(#v " " fmt sep, hideroot ? 0 : ssp.v);
> +     pp("%p", ssp_socket, ", ");
> +     pp("%p", ssp_soback, "\n ");
> +     p("%lld", ssp_len, ", ");
> +     p("%lld", ssp_max, ", ");
> +     pll("%lld", ssp_idletv.tv_sec, ", ");
> +     p("%ld", ssp_idletv.tv_usec, "\n ");
> +#undef       p
> +#undef       pll
> +#undef       pp
> }
> 
> /*


Reply via email to