[PATCH] gccattributes.h: define gccattr_returns_twice

2021-06-14 Thread Jesse Young
Signed-off-by: Jesse Young 

---
 For some out-of-tree code that wraps clone(), using setjmp()/longjmp()
 making it behave more like fork().

 src/include/skalibs/gccattributes.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/include/skalibs/gccattributes.h 
b/src/include/skalibs/gccattributes.h
index 0584480..7892687 100644
--- a/src/include/skalibs/gccattributes.h
+++ b/src/include/skalibs/gccattributes.h
@@ -6,6 +6,7 @@
 #ifdef __GNUC__
 
 #define gccattr_noreturn __attribute__((__noreturn__))
+#define gccattr_returns_twice __attribute__((__returns_twice__))
 #define gccattr_noinline __attribute__((__noinline__))
 #define gccattr_inline __attribute__((__always_inline__))
 #define gccattr_const __attribute__((__const__))
@@ -31,6 +32,7 @@
 #else
 
 #define gccattr_noreturn
+#define gccattr_returns_twice
 #define gccattr_noinline
 #define gccattr_inline
 #define gccattr_const
@@ -46,6 +48,7 @@
 
 #ifdef GCCATTR_COMPAT_0_22
 #define _a_noreturn gccattr_noreturn
+#define _a_returns_twice gccattr_returns_twice
 #define _a_noinline gccattr_noinline
 #define _a_inline gccattr_inline
 #define _a_const gccattr_const
-- 
2.32.0



[PATCH 2/2] doc: allreadwrite: document scatter/gatter functions

2021-06-13 Thread Jesse Young
---
 Document scatter/gatter functions as well as unsanitize_read() in the
 allreadwrite.h header.

 doc/libstddjb/allreadwrite.html| 53 ++
 src/include/skalibs/allreadwrite.h |  5 ++-
 2 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/doc/libstddjb/allreadwrite.html b/doc/libstddjb/allreadwrite.html
index 2619153..1c31022 100644
--- a/doc/libstddjb/allreadwrite.html
+++ b/doc/libstddjb/allreadwrite.html
@@ -51,6 +51,18 @@ and
 https://www.opengroup.org/onlinepubs/9699919799/functions/write.html;>write().
 
 
+
+ typedef ssize_t iovfunc_t (int fd, struct iovec const *v, unsigned int 
vlen)  
+This is the simplified type of IO functions such as
+https://www.opengroup.org/onlinepubs/9699919799/functions/readv.html;>readv()
+and
+https://www.opengroup.org/onlinepubs/9699919799/functions/writev.html;>writev(),
+where the content to perform IO on is specified as a
+scatter/gather array of vlen
+elements instead of a single string.
+
+
+
 
  typedef size_t alliofunc_t (int fd, char *buf, size_t len)  
 This is the type of an IO operation that expects all of its
@@ -82,6 +94,11 @@ semantics are appropriate, so EPIPE is a good candidate to 
signal EOF
 on reading.)
 
 
+
+ ssize_t unsanitize_read (ssize_t r)  
+Returns the inverse of sanitize_read.
+
+
 
  size_t allreadwrite (iofunc_t *f, int fd, char *s, size_t len)  

 *f must be a basic reading or writing function such as
@@ -94,6 +111,14 @@ blocking mode; if fd is in non-blocking mode, it 
might
 set errno to EWOULDBLOCK or EAGAIN.
 
 
+
+ size_t allreadwritev (iovfunc_t *f, int fd, struct iovec const *v, 
unsigned int vlen)  
+Like  allreadwrite 
+but the content to perform IO on is specified as a
+scatter/gather array of vlen
+elements instead of a single string.
+
+
 
  ssize_t fd_read (int fd, char *s, size_t len)  
 Safe wrapper around the
@@ -107,6 +132,20 @@ function.
 https://www.opengroup.org/onlinepubs/9699919799/functions/write.html;>write()
 function.
 
+
+
+ ssize_t fd_readv (int fd, struct iovec const *v, unsigned int vlen) 
 
+Safe wrapper around the
+https://www.opengroup.org/onlinepubs/9699919799/functions/readv.html;>readv()
+function.
+
+
+
+ ssize_t fd_writev (int fd, struct iovec const *v, unsigned int vlen) 
 
+Safe wrapper around the
+https://www.opengroup.org/onlinepubs/9699919799/functions/writev.html;>writev()
+function.
+
 
 
  ssize_t fd_recv (int fd, char *s, size_t len, unsigned int flags) 
 
@@ -138,5 +177,19 @@ around fd_write() if necessary, until either 
len bytes are
 written or an error occurs.
 
 
+
+ size_t allreadv (int fd, struct iovec *v, unsigned int vlen)  

+Like allread, but the bytes from fd are read into a
+scatter/gather array of vlen
+elements instead of a single string.
+
+
+
+ size_t allwritev (int fd, struct iovec const *v, unsigned int vlen) 
 
+Like allwrite, but the content to write is taken from a
+scatter/gather array of vlen
+elements instead of a single string.
+
+
 
 
diff --git a/src/include/skalibs/allreadwrite.h 
b/src/include/skalibs/allreadwrite.h
index 254d766..3df1ed1 100644
--- a/src/include/skalibs/allreadwrite.h
+++ b/src/include/skalibs/allreadwrite.h
@@ -14,6 +14,8 @@ extern size_t allreadwritev (iovfunc_t_ref, int, struct iovec 
const *, unsigned
 
 extern ssize_t fd_read (int, char *, size_t) ;
 extern ssize_t fd_write (int, char const *, size_t) ;
+extern ssize_t fd_readv (int, struct iovec const *, unsigned int) ;
+extern ssize_t fd_writev (int, struct iovec const *, unsigned int) ;
 
 extern ssize_t fd_recv (int, char *, size_t, unsigned int) ;
 extern ssize_t fd_send (int, char const *, size_t, unsigned int) ;
@@ -23,7 +25,4 @@ extern size_t allwrite (int, char const *, size_t) ;
 extern size_t allreadv (int, struct iovec const *, unsigned int) ;
 extern size_t allwritev (int, struct iovec const *, unsigned int) ;
 
-extern ssize_t fd_readv (int, struct iovec const *, unsigned int) ;
-extern ssize_t fd_writev (int, struct iovec const *, unsigned int) ;
-
 #endif
-- 
2.32.0



[PATCH 1/2] doc: reconcile openwrite{,v}nclose_suffix{,_devino}{,_sync} with djbunix.h

2021-06-13 Thread Jesse Young
---
 Hello,

 I've been working with the some of the scatter/gather I/O functions
 and noticed the documentation duplicated the non-vectored names. On
 closer inspection, I also discovered the devino variants have their
 arguments rearranged (except for *_internal).

 Cheers,
 Jesse

 doc/libstddjb/djbunix.html | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/libstddjb/djbunix.html b/doc/libstddjb/djbunix.html
index 95f3777..ce34d25 100644
--- a/doc/libstddjb/djbunix.html
+++ b/doc/libstddjb/djbunix.html
@@ -560,8 +560,8 @@ The function returns 1 if it succeeds, or 0 (and sets 
errno) if it fails.
 
  int openwritenclose_suffix (char const *file, char const *s, size_t 
len, char const *suffix) 
 int openwritenclose_suffix_sync (char const *file, char const *s, size_t len, 
char const *suffix) 
-int openwritenclose_suffix_devino (char const *file, char const *s, size_t 
len, dev_t *dev, ino_t *ino, char const *suffix) 
-int openwritenclose_suffix_devino_sync (char const *file, char const *s, 
size_t len, dev_t *dev, ino_t *ino, char const *suffix)  
+int openwritenclose_suffix_devino (char const *file, char const *s, size_t 
len, char const *suffix, dev_t *dev, ino_t *ino) 
+int openwritenclose_suffix_devino_sync (char const *file, char const *s, 
size_t len, char const *suffix, dev_t *dev, ino_t *ino)  
 Trivial shortcuts around openwritenclose_suffix_internal(). The
 reader can easily figure out what they do.
 
@@ -591,10 +591,10 @@ elements instead of a single string.
 
 
 
- int openwritenclose_suffix (char const *file, char const *s, size_t 
len, char const *suffix) 
-int openwritenclose_suffix_sync (char const *file, char const *s, size_t len, 
char const *suffix) 
-int openwritenclose_suffix_devino (char const *file, char const *s, size_t 
len, dev_t *dev, ino_t *ino, char const *suffix) 
-int openwritenclose_suffix_devino_sync (char const *file, char const *s, 
size_t len, dev_t *dev, ino_t *ino, char const *suffix)  
+ int openwritevnclose_suffix (char const *file, struct iovec const *v, 
unsigned int vlen, char const *suffix) 
+int openwritevnclose_suffix_sync (char const *file, struct iovec const *v, 
unsigned int vlen, char const *suffix) 
+int openwritevnclose_suffix_devino (char const *file, struct iovec const *v, 
unsigned int vlen, char const *suffix, dev_t *dev, ino_t *ino) 
+int openwritevnclose_suffix_devino_sync (char const *file, struct iovec const 
*v, unsigned int vlen, char const *suffix, dev_t *dev, ino_t *ino)  
 Trivial wrappers around openwritevnclose_suffix_internal().
 
 
-- 
2.32.0



Re: [PATCH] s6-supervise: Optionally run child in a new pid namespace

2017-07-16 Thread Jesse Young
On Sun, 16 Jul 2017 08:41:03 +
"Laurent Bercot"  wrote:

>   As I told Jesse on IRC, the patch isn't going in. I'm not including
> OS-specific code into s6, even with a compile-time option. The main
> reason for it is that it changes the API: the choice to spawn the
> service in a new namespace or not should be made at run time, so
> it would introduce a new file in the service directory that would only
> be valid under Linux, and the file would need to be supported by
> s6-rc and friends even on other systems, etc. This is exactly the kind
> of complexity created by OS divergences that plagues the Unix world
> and that I very much want to avoid. This change itself looks quite
> simple, but it would be a precedent and the slope is extremely
> slippery.
> 
> 
> >Though as Jesse explained, this requires some sort of exit/signal
> >proxing, which isn't the case here. Here the direct child of
> >s6-supervise remains the daemon itself - in its own pid ns - which is
> >much better.  
>   It would unarguably be more elegant, yes, but since there's a way to
> do without it, it's only about elegance, not feasability - and I
> really think the cost for elegance is too high.
>
>   execline's 'trap' binary can adequately perform the needed proxying
> at low resource cost.

There's a reliability issue as well, one thing trap won't be able to
do is relay KILL and other uncatchable signals. The proxy and the
service aren't completely sharing the same fate as each other, which
may break users expectations when they run, e.g. s6-svc -k. This fate
sharing mismatch is similar to how s6-sudod may keep running despite
s6-sudoc's death.

To reliably deliver a KILL signal to the service without re-introducing
pid races, the proxy would need a non-fatal way to know to send the
signal, requiring a new API. Improving on execline's trap program
to handle this would probably transform it into a version of daemontools
supervise that exits rather than respawns.

If there were no interposing process, an added benefit is that killing
the pid 1 in a namespace also kills all the other processes in that
namespace. This is a secondary thought, but we get it for free. Using
pid namespaces isn't necessary to reliably kill a set of processes on
Linux, another technique is to use a cgroup freezer, which essentially
puts a write lock on a set of processes, so that an unrelated process
can send signals without the TOCTOU races.

>   If more various namespace feature requests come in, at some point I
> will look for a way to integrate some namespace functions into
> skalibs, with proper compile-time guards and stubs, and then
> reconsider; but as long as there are ways to achieve the desired
> outcome with external tools, it's not a priority.

Unsharing everything, except for the pid namespace, should take effect
immediately on the calling process. CLONE_NEWNS is special since the
pid 1 effect gets applied to the process' first child, rather than the
process itself. This means the unshare tool from util-linux should be
able to do everything else in the usual exec chainloading style.

Alternative to patching s6-supervise, what do you think about adding an
option to spawn a different supervisor from s6-svscan? An example API
could be, s6-svscan will spawn "./supervisor" for each service dir it
tracks, if it doesn't exist, it just spawns s6-supervise as usual.

Note that having ./supervisor consist of "unshare -p s6-supervise" will
prevent s6-supervise from re-spawning its supervised child. fork() will
succeed the first time, but error with ENOMEM on subsequent calls unless
unshare(CLONE_NEWPID) is called again. In fact, the patch could
probably avoid refactoring the child into a continuation by doing
unshare() ; fork() ; rather than clone().


[PATCH] s6-supervise: Optionally run child in a new pid namespace

2017-07-15 Thread Jesse Young
This patch modifies s6-supervise to use the Linux specific clone()
system call to enable the child process to become the pid 1 of a new
pid namespace. To enable it, compile with -DWANT_CLONE_NEWPID and make
the ./clone-newpid file readable to s6-supervise in the desired service
directories.

I ask that this be included in s6-supervise.c because doing
unshare(CLONE_NEWPID) in the child process doesn't change the process's
pid to 1. Rather, it runs the next spawned child as pid 1. After
spawning that first process, the parent is prevented from spawning any
future children, subsequent attempts will fail with ENOMEM. Changing
s6-supervise to use clone() avoids these limitations as well as avoiding
extending the supervision chain, which would make exit/signal proxying
necessary.

To see correct ps output, /proc needs to be remounted. To avoid
conflicts with the parent pid namespace's /proc, this is done in a new
mount namespace.

For example:
#!/bin/execlineb -P
unshare -m -- 
foreground { umount /proc }
if -- { mount -t proc proc /proc }
exec ...

The functions added in this patch could be migrated into skalibs or
libs6, but I wanted to start with this as a PoC without making API
changes.

Jesse

---
 src/supervision/s6-supervise.c | 87
+++--- 1 file changed, 65
insertions(+), 22 deletions(-)

diff --git a/src/supervision/s6-supervise.c
b/src/supervision/s6-supervise.c index 2e8fa38..7605a82 100644
--- a/src/supervision/s6-supervise.c
+++ b/src/supervision/s6-supervise.c
@@ -9,6 +9,9 @@
 #include 
 #include 
 #include 
+#ifdef WANT_CLONE_NEWPID
+#  include 
+#endif
 #include 
 #include 
 #include 
@@ -203,6 +206,67 @@ static int maybesetsid (void)
   return 1 ;
 }
 
+static void exec_run(int p[2], int notifyp[2], int fd)
gccattr_noreturn ; +static void exec_run(int p[2], int notifyp[2], int
fd) +{
+  char const *cargv[2] = { "run", 0 } ;
+  PROG = "s6-supervise (child)" ;
+  selfpipe_finish() ;
+  if (notifyp[0] >= 0) close(notifyp[0]) ;
+  close(p[0]) ;
+  if (notifyp[1] >= 0 && fd_move(fd, notifyp[1]) < 0)
+  {
+failcoe(p[1]) ;
+strerr_diefu1sys(127, "move notification descriptor") ;
+  }
+  if (!maybesetsid())
+  {
+failcoe(p[1]) ;
+strerr_diefu1sys(127, "access ./nosetsid") ;
+  }
+  execve("./run", (char *const *)cargv, (char *const *)environ) ;
+  failcoe(p[1]) ;
+  strerr_dieexec(127, "run") ;
+}
+
+static pid_t spawn_run_fork(int p[2], int notifyp[2], int fd)
+{
+  pid_t pid = fork() ;
+  if (!pid) exec_run(p, notifyp, fd) ;
+  return pid ;
+}
+
+#ifdef WANT_CLONE_NEWPID
+typedef struct
+{
+  int p[2] ;
+  int notifyp[2] ;
+  int fd ;
+} exec_run_t ;
+
+static int exec_run_shim(void *ctx) gccattr_noreturn ;
+static int exec_run_shim(void *ctx)
+{
+  exec_run_t *er = (exec_run_t *) ctx ;
+  exec_run(er->p, er->notifyp, er->fd) ;
+}
+
+static pid_t spawn_run(int p[2], int notifyp[2], int fd)
+{
+  exec_run_t arg = { { p[0], p[1] }, { notifyp[0], notifyp[1] }, fd } ;
+  char child_stack[SIGSTKSZ] ;
+  if (access("clone-newpid", F_OK) < 0 && errno == ENOENT)
+return spawn_run_fork(p, notifyp, fd) ;
+  return (pid_t) clone(_run_shim, child_stack +
sizeof(child_stack),
+  CLONE_NEWPID | SIGCHLD, ) ;
+}
+#else /* if !defined(WANT_CLONE_NEWPID) */
+static pid_t spawn_run(int p[2], int notifyp[2], int fd)
+{
+  return spawn_run_fork(p, notifyp, fd) ;
+}
+#endif /* defined(WANT_CLONE_NEWPID) */
+
 static void trystart (void)
 {
   int p[2] ;
@@ -222,7 +286,7 @@ static void trystart (void)
 fd_close(p[1]) ; fd_close(p[0]) ;
 return ;
   }
-  pid = fork() ;
+  pid = spawn_run(p, notifyp, (int)fd) ;
   if (pid < 0)
   {
 settimeout(60) ;
@@ -232,27 +296,6 @@ static void trystart (void)
 fd_close(p[1]) ; fd_close(p[0]) ;
 return ;
   }
-  else if (!pid)
-  {
-char const *cargv[2] = { "run", 0 } ;
-PROG = "s6-supervise (child)" ;
-selfpipe_finish() ;
-if (notifyp[0] >= 0) close(notifyp[0]) ;
-close(p[0]) ;
-if (notifyp[1] >= 0 && fd_move((int)fd, notifyp[1]) < 0)
-{
-  failcoe(p[1]) ;
-  strerr_diefu1sys(127, "move notification descriptor") ;
-}
-if (!maybesetsid())
-{
-  failcoe(p[1]) ;
-  strerr_diefu1sys(127, "access ./nosetsid") ;
-}
-execve("./run", (char *const *)cargv, (char *const *)environ) ;
-failcoe(p[1]) ;
-strerr_dieexec(127, "run") ;
-  }
   if (notifyp[1] >= 0) fd_close(notifyp[1]) ;
   fd_close(p[1]) ;
   {
-- 
2.13.1


Re: [PATCH] socket_local46: Make room on the stack for IPv6 sockaddrs

2015-07-26 Thread Jesse Young
On Sun, 26 Jul 2015 21:29:31 +0200
Laurent Bercot ska-skaw...@skarnet.org wrote:

 On 26/07/2015 21:15, Jesse Young wrote:
  byte_copy() reads past the end of the sockaddr structure because it
  isn't sufficiently large enough to handle sockaddr_in6 addresses
  resulting in undefined behavior. armv6-alpine-linux-muslgnueabihf-gcc
  5.1.0-r0 generates the UDF instruction in this case, causing programs
  to SIGILL.
 
   Ah, good catch. sockaddr used to work, but it's stricto sensu incorrect -
 and musl is very touchy with standards.
   I believe the right fix is to use sockaddr_storage, though. I'll commit
 a fix tomorrow.
   Thanks !
 

I don't think sockaddr_storage is strictly necessary in this instance.
getsockname() will truncate its result, but socket_local46() won't
use that result unless sa_family is AF_INET or AF_INET6. That being
said, the only penalty to using sockaddr_storage is the extra space, and
it'd be more robust in cases of buggy implementations of getsockname().

Jesse


[PATCH] socket_local46: Make room on the stack for IPv6 sockaddrs

2015-07-26 Thread Jesse Young
byte_copy() reads past the end of the sockaddr structure because it
isn't sufficiently large enough to handle sockaddr_in6 addresses
resulting in undefined behavior. armv6-alpine-linux-muslgnueabihf-gcc
5.1.0-r0 generates the UDF instruction in this case, causing programs
to SIGILL.
---
 src/libstddjb/socket_local46.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/libstddjb/socket_local46.c b/src/libstddjb/socket_local46.c
index 448160c..611eb37 100644
--- a/src/libstddjb/socket_local46.c
+++ b/src/libstddjb/socket_local46.c
@@ -12,21 +12,23 @@
 
 int socket_local46 (int s, ip46_t *ip, uint16 *port)
 {
-  struct sockaddr sa ;
+  union {
+struct sockaddr_in6 sa6 ;
+struct sockaddr_in sa4 ;
+struct sockaddr sa ;
+  } sa ;
   socklen_t dummy = sizeof sa ;
-  if (getsockname(s, sa, dummy)  0) return -1 ;
-  if (sa.sa_family == AF_INET6)
+  if (getsockname(s, sa.sa, dummy)  0) return -1 ;
+  if (sa.sa.sa_family == AF_INET6)
   {
-register struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa ;
-byte_copy(ip-ip, 16, (char const *)sa6-sin6_addr.s6_addr) ;
-uint16_unpack_big((char *)sa6-sin6_port, port) ;
+byte_copy(ip-ip, 16, (char const *)sa.sa6.sin6_addr.s6_addr) ;
+uint16_unpack_big((char *)sa.sa6.sin6_port, port) ;
 ip-is6 = 1 ;
   }
-  else if (sa.sa_family == AF_INET)
+  else if (sa.sa.sa_family == AF_INET)
   {
-register struct sockaddr_in *sa4 = (struct sockaddr_in *)sa ;
-byte_copy(ip-ip, 4, (char const *)sa4-sin_addr.s_addr) ;
-uint16_unpack_big((char *)sa4-sin_port, port) ;
+byte_copy(ip-ip, 4, (char const *)sa.sa4.sin_addr.s_addr) ;
+uint16_unpack_big((char *)sa.sa4.sin_port, port) ;
 ip-is6 = 0 ;
   }
   else return (errno = EAFNOSUPPORT, -1) ;
-- 
2.4.6