Re: socket splicing without kernel lock

2019-07-04 Thread Claudio Jeker
On Thu, Jul 04, 2019 at 01:16:53PM +0200, Alexander Bluhm wrote:
> On Thu, Jul 04, 2019 at 10:47:22AM +0200, Claudio Jeker wrote:
> > Would it be possible to use some #defined flags here instead of 1,2,3?
> > Maybe use FREAD/FWRITE or define something new.
> 
> Makes code longer, but more readable.
> 
> ok?

Looks good to me. OK claudio@
 
> bluhm
> 
> Index: sys/kern/uipc_socket.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.231
> diff -u -p -r1.231 uipc_socket.c
> --- sys/kern/uipc_socket.c17 Dec 2018 16:46:59 -  1.231
> +++ sys/kern/uipc_socket.c4 Jul 2019 11:07:40 -
> @@ -195,6 +195,8 @@ solisten(struct socket *so, int backlog)
>   return (0);
>  }
> 
> +#define SOSP_FREEING_READ1
> +#define SOSP_FREEING_WRITE   2
>  void
>  sofree(struct socket *so, int s)
>  {
> @@ -218,11 +220,20 @@ sofree(struct socket *so, int s)
>   sigio_free(>so_sigio);
>  #ifdef SOCKET_SPLICE
>   if (so->so_sp) {
> - if (issplicedback(so))
> - sounsplice(so->so_sp->ssp_soback, so,
> - so->so_sp->ssp_soback != so);
> - if (isspliced(so))
> - sounsplice(so, so->so_sp->ssp_socket, 0);
> + if (issplicedback(so)) {
> + int freeing = SOSP_FREEING_WRITE;
> +
> + if (so->so_sp->ssp_soback == so)
> + freeing |= SOSP_FREEING_READ;
> + sounsplice(so->so_sp->ssp_soback, so, freeing);
> + }
> + if (isspliced(so)) {
> + int freeing = SOSP_FREEING_READ;
> +
> + if (so == so->so_sp->ssp_socket)
> + freeing |= SOSP_FREEING_WRITE;
> + sounsplice(so, so->so_sp->ssp_socket, freeing);
> + }
>   }
>  #endif /* SOCKET_SPLICE */
>   sbrelease(so, >so_snd);
> @@ -1148,7 +1159,7 @@ sosplice(struct socket *so, int fd, off_
>   return (error);
>   }
>   if (so->so_sp->ssp_socket)
> - sounsplice(so, so->so_sp->ssp_socket, 1);
> + sounsplice(so, so->so_sp->ssp_socket, 0);
>   sbunlock(so, >so_rcv);
>   return (0);
>   }
> @@ -1227,7 +1238,7 @@ sosplice(struct socket *so, int fd, off_
>  }
> 
>  void
> -sounsplice(struct socket *so, struct socket *sosp, int wakeup)
> +sounsplice(struct socket *so, struct socket *sosp, int freeing)
>  {
>   soassertlocked(so);
> 
> @@ -1236,8 +1247,11 @@ sounsplice(struct socket *so, struct soc
>   sosp->so_snd.sb_flags &= ~SB_SPLICE;
>   so->so_rcv.sb_flags &= ~SB_SPLICE;
>   so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
> - if (wakeup && soreadable(so))
> + /* Do not wakeup a socket that is about to be freed. */
> + if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so))
>   sorwakeup(so);
> + if ((freeing & SOSP_FREEING_WRITE) == 0 && sowriteable(sosp))
> + sowwakeup(sosp);
>  }
> 
>  void
> @@ -1249,7 +1263,7 @@ soidle(void *arg)
>   s = solock(so);
>   if (so->so_rcv.sb_flags & SB_SPLICE) {
>   so->so_error = ETIMEDOUT;
> - sounsplice(so, so->so_sp->ssp_socket, 1);
> + sounsplice(so, so->so_sp->ssp_socket, 0);
>   }
>   sounlock(so, s);
>  }
> @@ -1574,7 +1588,7 @@ somove(struct socket *so, int wait)
>   so->so_error = error;
>   if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) ||
>   (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) {
> - sounsplice(so, sosp, 1);
> + sounsplice(so, sosp, 0);
>   return (0);
>   }
>   if (timerisset(>so_idletv))
> @@ -1620,6 +1634,8 @@ sowwakeup(struct socket *so)
>  #ifdef SOCKET_SPLICE
>   if (so->so_snd.sb_flags & SB_SPLICE)
>   task_add(sosplice_taskq, >so_sp->ssp_soback->so_splicetask);
> + if (issplicedback(so))
> + return;
>  #endif
>   sowakeup(so, >so_snd);
>  }
> Index: share/man/man9/sosplice.9
> ===
> RCS file: /data/mirror/openbsd/cvs/src/share/man/man9/sosplice.9,v
> retrieving revision 1.9
> diff -u -p -r1.9 sosplice.9
> --- share/man/man9/sosplice.9 15 Aug 2018 12:10:49 -  1.9
> +++ share/man/man9/sosplice.9 3 Jul 2019 21:42:34 -
> @@ -206,10 +206,13 @@ will call
>  to trigger the transfer when new data or buffer space is available.
>  While socket splicing is active, any
>  .Xr read 2
> -from the source socket will block and the wakeup will not be delivered
> -to the file descriptor.
> -A read event or a socket error is signaled to userland after
> -dissolving.
> +from the source socket will block.
> +Neither read nor write wakeups will be delivered to the file
> +descriptors.
> 

Re: socket splicing without kernel lock

2019-07-04 Thread Alexander Bluhm
On Thu, Jul 04, 2019 at 10:47:22AM +0200, Claudio Jeker wrote:
> Would it be possible to use some #defined flags here instead of 1,2,3?
> Maybe use FREAD/FWRITE or define something new.

Makes code longer, but more readable.

ok?

bluhm

Index: sys/kern/uipc_socket.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.231
diff -u -p -r1.231 uipc_socket.c
--- sys/kern/uipc_socket.c  17 Dec 2018 16:46:59 -  1.231
+++ sys/kern/uipc_socket.c  4 Jul 2019 11:07:40 -
@@ -195,6 +195,8 @@ solisten(struct socket *so, int backlog)
return (0);
 }

+#define SOSP_FREEING_READ  1
+#define SOSP_FREEING_WRITE 2
 void
 sofree(struct socket *so, int s)
 {
@@ -218,11 +220,20 @@ sofree(struct socket *so, int s)
sigio_free(>so_sigio);
 #ifdef SOCKET_SPLICE
if (so->so_sp) {
-   if (issplicedback(so))
-   sounsplice(so->so_sp->ssp_soback, so,
-   so->so_sp->ssp_soback != so);
-   if (isspliced(so))
-   sounsplice(so, so->so_sp->ssp_socket, 0);
+   if (issplicedback(so)) {
+   int freeing = SOSP_FREEING_WRITE;
+
+   if (so->so_sp->ssp_soback == so)
+   freeing |= SOSP_FREEING_READ;
+   sounsplice(so->so_sp->ssp_soback, so, freeing);
+   }
+   if (isspliced(so)) {
+   int freeing = SOSP_FREEING_READ;
+
+   if (so == so->so_sp->ssp_socket)
+   freeing |= SOSP_FREEING_WRITE;
+   sounsplice(so, so->so_sp->ssp_socket, freeing);
+   }
}
 #endif /* SOCKET_SPLICE */
sbrelease(so, >so_snd);
@@ -1148,7 +1159,7 @@ sosplice(struct socket *so, int fd, off_
return (error);
}
if (so->so_sp->ssp_socket)
-   sounsplice(so, so->so_sp->ssp_socket, 1);
+   sounsplice(so, so->so_sp->ssp_socket, 0);
sbunlock(so, >so_rcv);
return (0);
}
@@ -1227,7 +1238,7 @@ sosplice(struct socket *so, int fd, off_
 }

 void
-sounsplice(struct socket *so, struct socket *sosp, int wakeup)
+sounsplice(struct socket *so, struct socket *sosp, int freeing)
 {
soassertlocked(so);

@@ -1236,8 +1247,11 @@ sounsplice(struct socket *so, struct soc
sosp->so_snd.sb_flags &= ~SB_SPLICE;
so->so_rcv.sb_flags &= ~SB_SPLICE;
so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
-   if (wakeup && soreadable(so))
+   /* Do not wakeup a socket that is about to be freed. */
+   if ((freeing & SOSP_FREEING_READ) == 0 && soreadable(so))
sorwakeup(so);
+   if ((freeing & SOSP_FREEING_WRITE) == 0 && sowriteable(sosp))
+   sowwakeup(sosp);
 }

 void
@@ -1249,7 +1263,7 @@ soidle(void *arg)
s = solock(so);
if (so->so_rcv.sb_flags & SB_SPLICE) {
so->so_error = ETIMEDOUT;
-   sounsplice(so, so->so_sp->ssp_socket, 1);
+   sounsplice(so, so->so_sp->ssp_socket, 0);
}
sounlock(so, s);
 }
@@ -1574,7 +1588,7 @@ somove(struct socket *so, int wait)
so->so_error = error;
if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) ||
(sosp->so_state & SS_CANTSENDMORE) || maxreached || error) {
-   sounsplice(so, sosp, 1);
+   sounsplice(so, sosp, 0);
return (0);
}
if (timerisset(>so_idletv))
@@ -1620,6 +1634,8 @@ sowwakeup(struct socket *so)
 #ifdef SOCKET_SPLICE
if (so->so_snd.sb_flags & SB_SPLICE)
task_add(sosplice_taskq, >so_sp->ssp_soback->so_splicetask);
+   if (issplicedback(so))
+   return;
 #endif
sowakeup(so, >so_snd);
 }
Index: share/man/man9/sosplice.9
===
RCS file: /data/mirror/openbsd/cvs/src/share/man/man9/sosplice.9,v
retrieving revision 1.9
diff -u -p -r1.9 sosplice.9
--- share/man/man9/sosplice.9   15 Aug 2018 12:10:49 -  1.9
+++ share/man/man9/sosplice.9   3 Jul 2019 21:42:34 -
@@ -206,10 +206,13 @@ will call
 to trigger the transfer when new data or buffer space is available.
 While socket splicing is active, any
 .Xr read 2
-from the source socket will block and the wakeup will not be delivered
-to the file descriptor.
-A read event or a socket error is signaled to userland after
-dissolving.
+from the source socket will block.
+Neither read nor write wakeups will be delivered to the file
+descriptors.
+After dissolving, a read event or a socket error is signaled to
+userland on the source socket.
+If space is available, a write event will be signaled on the drain
+socket.
 .Sh RETURN VALUES
 .Fn sosplice
 returns 0 on 

Re: socket splicing without kernel lock

2019-07-04 Thread Claudio Jeker
On Wed, Jul 03, 2019 at 11:49:51PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> I would like to remove a useless kernel lock during socket splicing.
> 
> We have a socket "so" that splices data to socket "sosp".  Everytime
> when space in sosp gets available, we add a task to move data from
> so to sosp.  Additionally we call sowakeup() from sowwakeup().  I
> have added this as it is possible to splice from so to sosp and
> write from userland to sosp simultaneously.  In practice this does
> not make sense as the data streams from two sources would get mixed.
> Nothing uses this.  So it is not neccessay to inform userland that
> it is possible to write.
> 
> Note that sowakeup() takes the kernel lock.  So when I do not call
> it for a spliced socket, there is no kernel lock in the TCP splicing
> path anymore.
> 
> But then I have to wakeup userland for the wirting socket in
> sounsplice().
> 
> ok?

I think in general this makes sense. One comment inline.


> Index: sys/kern/uipc_socket.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.231
> diff -u -p -r1.231 uipc_socket.c
> --- sys/kern/uipc_socket.c17 Dec 2018 16:46:59 -  1.231
> +++ sys/kern/uipc_socket.c3 Jul 2019 13:56:09 -
> @@ -220,9 +220,10 @@ sofree(struct socket *so, int s)
>   if (so->so_sp) {
>   if (issplicedback(so))
>   sounsplice(so->so_sp->ssp_soback, so,
> - so->so_sp->ssp_soback != so);
> + so->so_sp->ssp_soback == so ? 3 : 2);
>   if (isspliced(so))
> - sounsplice(so, so->so_sp->ssp_socket, 0);
> + sounsplice(so, so->so_sp->ssp_socket,
> + so == so->so_sp->ssp_socket ? 3 : 1);
>   }
>  #endif /* SOCKET_SPLICE */
>   sbrelease(so, >so_snd);
> @@ -1148,7 +1149,7 @@ sosplice(struct socket *so, int fd, off_
>   return (error);
>   }
>   if (so->so_sp->ssp_socket)
> - sounsplice(so, so->so_sp->ssp_socket, 1);
> + sounsplice(so, so->so_sp->ssp_socket, 0);
>   sbunlock(so, >so_rcv);
>   return (0);
>   }
> @@ -1227,7 +1228,7 @@ sosplice(struct socket *so, int fd, off_
>  }
> 
>  void
> -sounsplice(struct socket *so, struct socket *sosp, int wakeup)
> +sounsplice(struct socket *so, struct socket *sosp, int freeing)
>  {
>   soassertlocked(so);
> 
> @@ -1236,8 +1237,11 @@ sounsplice(struct socket *so, struct soc
>   sosp->so_snd.sb_flags &= ~SB_SPLICE;
>   so->so_rcv.sb_flags &= ~SB_SPLICE;
>   so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
> - if (wakeup && soreadable(so))
> + /* Do not wakeup a socket that is about to be freed. */
> + if ((freeing & 1) == 0 && soreadable(so))
>   sorwakeup(so);
> + if ((freeing & 2) == 0 && sowriteable(sosp))
> + sowwakeup(sosp);

Would it be possible to use some #defined flags here instead of 1,2,3?
Maybe use FREAD/FWRITE or define something new.

>  }
> 
>  void
> @@ -1249,7 +1253,7 @@ soidle(void *arg)
>   s = solock(so);
>   if (so->so_rcv.sb_flags & SB_SPLICE) {
>   so->so_error = ETIMEDOUT;
> - sounsplice(so, so->so_sp->ssp_socket, 1);
> + sounsplice(so, so->so_sp->ssp_socket, 0);
>   }
>   sounlock(so, s);
>  }
> @@ -1574,7 +1578,7 @@ somove(struct socket *so, int wait)
>   so->so_error = error;
>   if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) ||
>   (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) {
> - sounsplice(so, sosp, 1);
> + sounsplice(so, sosp, 0);
>   return (0);
>   }
>   if (timerisset(>so_idletv))
> @@ -1620,6 +1624,8 @@ sowwakeup(struct socket *so)
>  #ifdef SOCKET_SPLICE
>   if (so->so_snd.sb_flags & SB_SPLICE)
>   task_add(sosplice_taskq, >so_sp->ssp_soback->so_splicetask);
> + if (issplicedback(so))
> + return;
>  #endif
>   sowakeup(so, >so_snd);
>  }
> Index: share/man/man9/sosplice.9
> ===
> RCS file: /data/mirror/openbsd/cvs/src/share/man/man9/sosplice.9,v
> retrieving revision 1.9
> diff -u -p -r1.9 sosplice.9
> --- share/man/man9/sosplice.9 15 Aug 2018 12:10:49 -  1.9
> +++ share/man/man9/sosplice.9 3 Jul 2019 21:42:34 -
> @@ -206,10 +206,13 @@ will call
>  to trigger the transfer when new data or buffer space is available.
>  While socket splicing is active, any
>  .Xr read 2
> -from the source socket will block and the wakeup will not be delivered
> -to the file descriptor.
> -A read event or a socket error is signaled to userland after
> -dissolving.
> +from the source socket will block.
> +Neither read nor write wakeups will be delivered to the file
> 

socket splicing without kernel lock

2019-07-03 Thread Alexander Bluhm
Hi,

I would like to remove a useless kernel lock during socket splicing.

We have a socket "so" that splices data to socket "sosp".  Everytime
when space in sosp gets available, we add a task to move data from
so to sosp.  Additionally we call sowakeup() from sowwakeup().  I
have added this as it is possible to splice from so to sosp and
write from userland to sosp simultaneously.  In practice this does
not make sense as the data streams from two sources would get mixed.
Nothing uses this.  So it is not neccessay to inform userland that
it is possible to write.

Note that sowakeup() takes the kernel lock.  So when I do not call
it for a spliced socket, there is no kernel lock in the TCP splicing
path anymore.

But then I have to wakeup userland for the wirting socket in
sounsplice().

ok?

bluhm

Index: sys/kern/uipc_socket.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.231
diff -u -p -r1.231 uipc_socket.c
--- sys/kern/uipc_socket.c  17 Dec 2018 16:46:59 -  1.231
+++ sys/kern/uipc_socket.c  3 Jul 2019 13:56:09 -
@@ -220,9 +220,10 @@ sofree(struct socket *so, int s)
if (so->so_sp) {
if (issplicedback(so))
sounsplice(so->so_sp->ssp_soback, so,
-   so->so_sp->ssp_soback != so);
+   so->so_sp->ssp_soback == so ? 3 : 2);
if (isspliced(so))
-   sounsplice(so, so->so_sp->ssp_socket, 0);
+   sounsplice(so, so->so_sp->ssp_socket,
+   so == so->so_sp->ssp_socket ? 3 : 1);
}
 #endif /* SOCKET_SPLICE */
sbrelease(so, >so_snd);
@@ -1148,7 +1149,7 @@ sosplice(struct socket *so, int fd, off_
return (error);
}
if (so->so_sp->ssp_socket)
-   sounsplice(so, so->so_sp->ssp_socket, 1);
+   sounsplice(so, so->so_sp->ssp_socket, 0);
sbunlock(so, >so_rcv);
return (0);
}
@@ -1227,7 +1228,7 @@ sosplice(struct socket *so, int fd, off_
 }

 void
-sounsplice(struct socket *so, struct socket *sosp, int wakeup)
+sounsplice(struct socket *so, struct socket *sosp, int freeing)
 {
soassertlocked(so);

@@ -1236,8 +1237,11 @@ sounsplice(struct socket *so, struct soc
sosp->so_snd.sb_flags &= ~SB_SPLICE;
so->so_rcv.sb_flags &= ~SB_SPLICE;
so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
-   if (wakeup && soreadable(so))
+   /* Do not wakeup a socket that is about to be freed. */
+   if ((freeing & 1) == 0 && soreadable(so))
sorwakeup(so);
+   if ((freeing & 2) == 0 && sowriteable(sosp))
+   sowwakeup(sosp);
 }

 void
@@ -1249,7 +1253,7 @@ soidle(void *arg)
s = solock(so);
if (so->so_rcv.sb_flags & SB_SPLICE) {
so->so_error = ETIMEDOUT;
-   sounsplice(so, so->so_sp->ssp_socket, 1);
+   sounsplice(so, so->so_sp->ssp_socket, 0);
}
sounlock(so, s);
 }
@@ -1574,7 +1578,7 @@ somove(struct socket *so, int wait)
so->so_error = error;
if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) ||
(sosp->so_state & SS_CANTSENDMORE) || maxreached || error) {
-   sounsplice(so, sosp, 1);
+   sounsplice(so, sosp, 0);
return (0);
}
if (timerisset(>so_idletv))
@@ -1620,6 +1624,8 @@ sowwakeup(struct socket *so)
 #ifdef SOCKET_SPLICE
if (so->so_snd.sb_flags & SB_SPLICE)
task_add(sosplice_taskq, >so_sp->ssp_soback->so_splicetask);
+   if (issplicedback(so))
+   return;
 #endif
sowakeup(so, >so_snd);
 }
Index: share/man/man9/sosplice.9
===
RCS file: /data/mirror/openbsd/cvs/src/share/man/man9/sosplice.9,v
retrieving revision 1.9
diff -u -p -r1.9 sosplice.9
--- share/man/man9/sosplice.9   15 Aug 2018 12:10:49 -  1.9
+++ share/man/man9/sosplice.9   3 Jul 2019 21:42:34 -
@@ -206,10 +206,13 @@ will call
 to trigger the transfer when new data or buffer space is available.
 While socket splicing is active, any
 .Xr read 2
-from the source socket will block and the wakeup will not be delivered
-to the file descriptor.
-A read event or a socket error is signaled to userland after
-dissolving.
+from the source socket will block.
+Neither read nor write wakeups will be delivered to the file
+descriptors.
+After dissolving, a read event or a socket error is signaled to
+userland on the source socket.
+If space is available, a write event will be signaled on the drain
+socket.
 .Sh RETURN VALUES
 .Fn sosplice
 returns 0 on success and otherwise the error number.