On Fri, Oct 31, 2014 at 02:50:00PM +1000, David Gwynne wrote:
> so without splicing, the payloads from multiple tcp packets (at least all of 
> the ones in a single softnet run?) get bundled up into a buffer that userland 
> reads and then writes out again in a single go. right?
> 
> you're suggesting the taskq as a way to defer output till after the current 
> softnet call has processed all its packets and queued all the tcp packet 
> payloads onto the socket?

Exactly.

> its worth remembering there are other memory costs too. i think a kthread 
> (the thing taskqs run on) is 5 pages amd64, so 20KB.

We can delay the creation of the sosplice thread until user-land
tries to splice for the first time.

I would like to get in the sosplice pool in first.  The user-land
part was missing in my previous diff.  Updated diff that actually
can do make build.

ok?

bluhm

Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.267
diff -u -p -u -p -r1.267 kern_sysctl.c
--- sys/kern/kern_sysctl.c      17 Oct 2014 01:51:39 -0000      1.267
+++ sys/kern/kern_sysctl.c      31 Oct 2014 10:23:44 -0000
@@ -1062,11 +1062,12 @@ fill_file(struct kinfo_file *kf, struct 
                kf->so_family = so->so_proto->pr_domain->dom_family;
                kf->so_rcv_cc = so->so_rcv.sb_cc;
                kf->so_snd_cc = so->so_snd.sb_cc;
-               if (so->so_splice) {
+               if (isspliced(so)) {
                        if (show_pointers)
-                               kf->so_splice = PTRTOINT64(so->so_splice);
-                       kf->so_splicelen = so->so_splicelen;
-               } else if (so->so_spliceback)
+                               kf->so_splice =
+                                   PTRTOINT64(so->so_sp->ssp_socket);
+                       kf->so_splicelen = so->so_sp->ssp_len;
+               } else if (issplicedback(so))
                        kf->so_splicelen = -1;
                if (!so->so_pcb)
                        break;
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.133
diff -u -p -u -p -r1.133 uipc_socket.c
--- sys/kern/uipc_socket.c      9 Sep 2014 02:07:17 -0000       1.133
+++ sys/kern/uipc_socket.c      31 Oct 2014 10:23:44 -0000
@@ -80,12 +80,19 @@ int somaxconn = SOMAXCONN;
 int    sominconn = SOMINCONN;
 
 struct pool socket_pool;
+#ifdef SOCKET_SPLICE
+struct pool sosplice_pool;
+#endif
 
 void
 soinit(void)
 {
 
        pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL);
+#ifdef SOCKET_SPLICE
+       pool_init(&sosplice_pool, sizeof(struct sosplice), 0, 0, 0, "sosppl",
+           NULL);
+#endif
 }
 
 /*
@@ -157,7 +164,7 @@ solisten(struct socket *so, int backlog)
        if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
                return (EOPNOTSUPP);
 #ifdef SOCKET_SPLICE
-       if (so->so_splice || so->so_spliceback)
+       if (isspliced(so) || issplicedback(so))
                return (EOPNOTSUPP);
 #endif /* SOCKET_SPLICE */
        s = splsoftnet();
@@ -199,10 +206,15 @@ sofree(struct socket *so)
                        return;
        }
 #ifdef SOCKET_SPLICE
-       if (so->so_spliceback)
-               sounsplice(so->so_spliceback, so, so->so_spliceback != so);
-       if (so->so_splice)
-               sounsplice(so, so->so_splice, 0);
+       if (so->so_sp) {
+               if (issplicedback(so))
+                       sounsplice(so->so_sp->ssp_soback, so,
+                           so->so_sp->ssp_soback != so);
+               if (isspliced(so))
+                       sounsplice(so, so->so_sp->ssp_socket, 0);
+               pool_put(&sosplice_pool, so->so_sp);
+               so->so_sp = NULL;
+       }
 #endif /* SOCKET_SPLICE */
        sbrelease(&so->so_snd);
        sorflush(so);
@@ -647,7 +659,7 @@ restart:
 
        m = so->so_rcv.sb_mb;
 #ifdef SOCKET_SPLICE
-       if (so->so_splice)
+       if (isspliced(so))
                m = NULL;
 #endif /* SOCKET_SPLICE */
        /*
@@ -669,7 +681,7 @@ restart:
 #ifdef DIAGNOSTIC
                if (m == NULL && so->so_rcv.sb_cc)
 #ifdef SOCKET_SPLICE
-                   if (so->so_splice == NULL)
+                   if (!isspliced(so))
 #endif /* SOCKET_SPLICE */
                        panic("receive 1");
 #endif
@@ -1021,6 +1033,12 @@ sorflush(struct socket *so)
 }
 
 #ifdef SOCKET_SPLICE
+
+#define so_splicelen   so_sp->ssp_len
+#define so_splicemax   so_sp->ssp_max
+#define so_idletv      so_sp->ssp_idletv
+#define so_idleto      so_sp->ssp_idleto
+
 int
 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv)
 {
@@ -1035,6 +1053,8 @@ sosplice(struct socket *so, int fd, off_
        if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
            (so->so_proto->pr_flags & PR_CONNREQUIRED))
                return (ENOTCONN);
+       if (so->so_sp == NULL)
+               so->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
 
        /* If no fd is given, unsplice by removing existing link. */
        if (fd < 0) {
@@ -1043,8 +1063,8 @@ sosplice(struct socket *so, int fd, off_
                    (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0)
                        return (error);
                s = splsoftnet();
-               if (so->so_splice)
-                       sounsplice(so, so->so_splice, 1);
+               if (so->so_sp->ssp_socket)
+                       sounsplice(so, so->so_sp->ssp_socket, 1);
                splx(s);
                sbunlock(&so->so_rcv);
                return (0);
@@ -1060,6 +1080,8 @@ sosplice(struct socket *so, int fd, off_
        if ((error = getsock(curproc->p_fd, fd, &fp)) != 0)
                return (error);
        sosp = fp->f_data;
+       if (sosp->so_sp == NULL)
+               sosp->so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
 
        /* Lock both receive and send buffer. */
        if ((error = sblock(&so->so_rcv,
@@ -1074,7 +1096,7 @@ sosplice(struct socket *so, int fd, off_
        }
        s = splsoftnet();
 
-       if (so->so_splice || sosp->so_spliceback) {
+       if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
                error = EBUSY;
                goto release;
        }
@@ -1092,8 +1114,8 @@ sosplice(struct socket *so, int fd, off_
        }
 
        /* Splice so and sosp together. */
-       so->so_splice = sosp;
-       sosp->so_spliceback = so;
+       so->so_sp->ssp_socket = sosp;
+       sosp->so_sp->ssp_soback = so;
        so->so_splicelen = 0;
        so->so_splicemax = max;
        if (tv)
@@ -1127,7 +1149,7 @@ sounsplice(struct socket *so, struct soc
        timeout_del(&so->so_idleto);
        sosp->so_snd.sb_flagsintr &= ~SB_SPLICE;
        so->so_rcv.sb_flagsintr &= ~SB_SPLICE;
-       so->so_splice = sosp->so_spliceback = NULL;
+       so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
        if (wakeup && soreadable(so))
                sorwakeup(so);
 }
@@ -1139,9 +1161,9 @@ soidle(void *arg)
        int s;
 
        s = splsoftnet();
-       if (so->so_splice) {
+       if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
                so->so_error = ETIMEDOUT;
-               sounsplice(so, so->so_splice, 1);
+               sounsplice(so, so->so_sp->ssp_socket, 1);
        }
        splx(s);
 }
@@ -1155,7 +1177,7 @@ soidle(void *arg)
 int
 somove(struct socket *so, int wait)
 {
-       struct socket   *sosp = so->so_splice;
+       struct socket   *sosp = so->so_sp->ssp_socket;
        struct mbuf     *m, **mp, *nextrecord;
        u_long           len, off, oobmark;
        long             space;
@@ -1408,6 +1430,12 @@ somove(struct socket *so, int wait)
                timeout_add_tv(&so->so_idleto, &so->so_idletv);
        return (1);
 }
+
+#undef so_splicelen
+#undef so_splicemax
+#undef so_idletv
+#undef so_idleto
+
 #endif /* SOCKET_SPLICE */
 
 void
@@ -1416,7 +1444,7 @@ sorwakeup(struct socket *so)
 #ifdef SOCKET_SPLICE
        if (so->so_rcv.sb_flagsintr & SB_SPLICE)
                (void) somove(so, M_DONTWAIT);
-       if (so->so_splice)
+       if (isspliced(so))
                return;
 #endif
        sowakeup(so, &so->so_rcv);
@@ -1429,7 +1457,7 @@ sowwakeup(struct socket *so)
 {
 #ifdef SOCKET_SPLICE
        if (so->so_snd.sb_flagsintr & SB_SPLICE)
-               (void) somove(so->so_spliceback, M_DONTWAIT);
+               (void) somove(so->so_sp->ssp_soback, M_DONTWAIT);
 #endif
        sowakeup(so, &so->so_snd);
 }
@@ -1722,11 +1750,12 @@ sogetopt(struct socket *so, int level, i
 #ifdef SOCKET_SPLICE
                case SO_SPLICE:
                    {
+                       off_t len;
                        int s = splsoftnet();
 
                        m->m_len = sizeof(off_t);
-                       memcpy(mtod(m, off_t *), &so->so_splicelen,
-                           sizeof(off_t));
+                       len = so->so_sp ? so->so_sp->ssp_len : 0;
+                       memcpy(mtod(m, off_t *), &len, sizeof(off_t));
                        splx(s);
                        break;
                    }
@@ -1815,7 +1844,7 @@ filt_soread(struct knote *kn, long hint)
 
        kn->kn_data = so->so_rcv.sb_cc;
 #ifdef SOCKET_SPLICE
-       if (so->so_splice)
+       if (isspliced(so))
                return (0);
 #endif /* SOCKET_SPLICE */
        if (so->so_state & SS_CANTRCVMORE) {
Index: sys/sys/socketvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.56
diff -u -p -u -p -r1.56 socketvar.h
--- sys/sys/socketvar.h 9 Sep 2014 02:07:17 -0000       1.56
+++ sys/sys/socketvar.h 31 Oct 2014 10:23:44 -0000
@@ -81,13 +81,17 @@ struct socket {
        uid_t   so_siguid;              /* uid of process who set so_pgid */
        uid_t   so_sigeuid;             /* euid of process who set so_pgid */
        u_long  so_oobmark;             /* chars to oob mark */
-
-       struct  socket *so_splice;      /* send data to drain socket */
-       struct  socket *so_spliceback;  /* back ref for notify and cleanup */
-       off_t   so_splicelen;           /* number of bytes spliced so far */
-       off_t   so_splicemax;           /* maximum number of bytes to splice */
-       struct  timeval so_idletv;      /* idle timeout */
-       struct  timeout so_idleto;
+/*
+ * Variables for socket splicing, allocated only when needed.
+ */
+       struct sosplice {
+               struct  socket *ssp_socket;     /* send data to drain socket */
+               struct  socket *ssp_soback;     /* back ref to source socket */
+               off_t   ssp_len;                /* number of bytes spliced */
+               off_t   ssp_max;                /* maximum number of bytes */
+               struct  timeval ssp_idletv;     /* idle timeout */
+               struct  timeout ssp_idleto;
+       } *so_sp;
 /*
  * Variables for socket buffering.
  */
@@ -148,6 +152,9 @@ struct socket {
  * Macros for sockets and socket buffering.
  */
 
+#define isspliced(so)          ((so)->so_sp && (so)->so_sp->ssp_socket)
+#define issplicedback(so)      ((so)->so_sp && (so)->so_sp->ssp_soback)
+
 /*
  * Do we need to notify the other side when I/O is possible?
  */
@@ -173,7 +180,7 @@ struct socket {
 
 /* can we read something from so? */
 #define        soreadable(so)  \
-    ((so)->so_splice == NULL && \
+    (!isspliced(so) && \
     ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \
     ((so)->so_state & SS_CANTRCVMORE) || \
     (so)->so_qlen || (so)->so_error))
Index: lib/libkvm/kvm_file2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/lib/libkvm/kvm_file2.c,v
retrieving revision 1.38
diff -u -p -u -p -r1.38 kvm_file2.c
--- lib/libkvm/kvm_file2.c      25 Oct 2014 03:18:58 -0000      1.38
+++ lib/libkvm/kvm_file2.c      31 Oct 2014 11:40:05 -0000
@@ -542,6 +542,7 @@ fill_file(kvm_t *kd, struct kinfo_file *
 
        case DTYPE_SOCKET: {
                struct socket sock;
+               struct sosplice ssp;
                struct protosw protosw;
                struct domain domain;
 
@@ -565,11 +566,18 @@ fill_file(kvm_t *kd, struct kinfo_file *
                kf->so_family = domain.dom_family;
                kf->so_rcv_cc = sock.so_rcv.sb_cc;
                kf->so_snd_cc = sock.so_snd.sb_cc;
-               if (sock.so_splice) {
-                       kf->so_splice = PTRTOINT64(sock.so_splice);
-                       kf->so_splicelen = sock.so_splicelen;
-               } else if (sock.so_spliceback)
-                       kf->so_splicelen = -1;
+               if (sock.so_sp) {
+                       if (KREAD(kd, (u_long)sock.so_sp, &ssp)) {
+                               _kvm_err(kd, kd->program, "can't read splice");
+                               return (-1);
+                       }
+                       if (ssp.ssp_socket) {
+                               kf->so_splice = PTRTOINT64(ssp.ssp_socket);
+                               kf->so_splicelen = ssp.ssp_len;
+                       } else if (ssp.ssp_soback) {
+                               kf->so_splicelen = -1;
+                       }
+               }
                if (!sock.so_pcb)
                        break;
                switch (kf->so_family) {
Index: usr.bin/netstat/inet.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.bin/netstat/inet.c,v
retrieving revision 1.136
diff -u -p -u -p -r1.136 inet.c
--- usr.bin/netstat/inet.c      26 Oct 2014 14:43:03 -0000      1.136
+++ usr.bin/netstat/inet.c      31 Oct 2014 11:00:17 -0000
@@ -91,6 +91,7 @@ char  *inetname(struct in_addr *);
 void   inetprint(struct in_addr *, in_port_t, char *, int);
 char   *inet6name(struct in6_addr *);
 void   inet6print(struct in6_addr *, int, char *);
+void   sosplice_dump(u_long);
 void   sockbuf_dump(struct sockbuf *, const char *);
 void   protosw_dump(u_long, u_long);
 void   domain_dump(u_long, u_long, short);
@@ -1166,7 +1167,6 @@ socket_dump(u_long off)
        kread(off, &so, sizeof(so));
 
 #define        p(fmt, v, sep) printf(#v " " fmt sep, so.v);
-#define        pll(fmt, v, sep) printf(#v " " fmt sep, (long long) so.v);
 #define        pp(fmt, v, sep) printf(#v " " fmt sep, hideroot ? 0 : so.v);
        printf("socket %#lx\n ", hideroot ? 0 : off);
        p("%#.4x", so_type, "\n ");
@@ -1185,12 +1185,8 @@ socket_dump(u_long off)
        p("%u", so_siguid, ", ");
        p("%u", so_sigeuid, "\n ");
        p("%lu", so_oobmark, "\n ");
-       pp("%p", so_splice, ", ");
-       pp("%p", so_spliceback, "\n ");
-       p("%lld", so_splicelen, ", ");
-       p("%lld", so_splicemax, ", ");
-       pll("%lld", so_idletv.tv_sec, ", ");
-       p("%ld", so_idletv.tv_usec, "\n ");
+       if (so.so_sp)
+               sosplice_dump((u_long)so.so_sp);
        sockbuf_dump(&so.so_rcv, "so_rcv");
        sockbuf_dump(&so.so_snd, "so_snd");
        p("%u", so_euid, ", ");
@@ -1204,6 +1200,32 @@ socket_dump(u_long off)
        if (!vflag)
                return;
        protosw_dump((u_long)so.so_proto, (u_long)so.so_pcb);
+}
+
+/*
+ * Dump the contents of a struct sosplice
+ */
+void
+sosplice_dump(u_long off)
+{
+       struct sosplice ssp;
+
+       if (off == 0)
+               return;
+       kread(off, &ssp, sizeof(ssp));
+
+#define        p(fmt, v, sep) printf(#v " " fmt sep, ssp.v);
+#define        pll(fmt, v, sep) printf(#v " " fmt sep, (long long) ssp.v);
+#define        pp(fmt, v, sep) printf(#v " " fmt sep, hideroot ? 0 : ssp.v);
+       pp("%p", ssp_socket, ", ");
+       pp("%p", ssp_soback, "\n ");
+       p("%lld", ssp_len, ", ");
+       p("%lld", ssp_max, ", ");
+       pll("%lld", ssp_idletv.tv_sec, ", ");
+       p("%ld", ssp_idletv.tv_usec, "\n ");
+#undef p
+#undef pll
+#undef pp
 }
 
 /*

Reply via email to