guix_mirror_bot pushed a commit to branch master
in repository guix.

commit 1eccea7ffb7eac43670d5fd76e8afa8ecfe6b0b9
Author: Maxim Cournoyer <[email protected]>
AuthorDate: Fri Oct 17 23:12:27 2025 +0900

    build/syscalls: Introduce new safe-clone and use it.
    
    * guix/build/syscalls.scm (without-automatic-finalization): Accept multiple
    expressions.
    (without-garbage-collection): New syntax.
    (without-threads): Likewise.
    (ensure-signal-delivery-thread, safe-clone): New procedures.
    * tests/syscalls.scm: ("clone and unshare triggers EINVAL")
    ("safe-clone and unshare succeeds"): New tests.
    * gnu/build/linux-container.scm (run-container): Adjust to use 'safe-clone'.
    
    Relates-to: #1169
    Change-Id: I044c11a899e24e547a7aed97f30c8e7250ab5363
---
 gnu/build/linux-container.scm | 181 ++++++++++++++++++++----------------------
 guix/build/syscalls.scm       |  53 ++++++++++++-
 tests/syscalls.scm            |  36 ++++++++-
 3 files changed, 172 insertions(+), 98 deletions(-)

diff --git a/gnu/build/linux-container.scm b/gnu/build/linux-container.scm
index 25890ec0a1..ff5449d0b0 100644
--- a/gnu/build/linux-container.scm
+++ b/gnu/build/linux-container.scm
@@ -263,100 +263,93 @@ that host UIDs (respectively GIDs) map to in the 
namespace."
   ;; child process blocks until the parent writes to it.
   (match (socketpair PF_UNIX (logior SOCK_CLOEXEC SOCK_STREAM) 0)
     ((child . parent)
-     (let ((flags (namespaces->bit-mask namespaces)))
-       (match (clone flags)
-         (0
-          ;; Inhibit thread creation until after the unshare call.
-          (gc-disable)
-          (call-with-clean-exit
-           (lambda ()
-             (close-port parent)
-             ;; Wait for parent to set things up.
-             (match (read child)
-               ('ready
-                (purify-environment)
-                (when (and (memq 'mnt namespaces)
-                           (not (string=? root "/")))
-                  (catch #t
-                    (lambda ()
-                      (mount-file-systems root mounts
-                                          #:mount-/proc? (memq 'pid namespaces)
-                                          #:mount-/sys?  (memq 'net
-                                                               namespaces)
-                                          #:populate-file-system
-                                          (lambda ()
-                                            (populate-file-system)
-                                            (when (and (memq 'net namespaces)
-                                                       loopback-network?)
-                                              (set-network-interface-up "lo")
-
-                                              ;; When isolated from the
-                                              ;; network, provide a minimal
-                                              ;; /etc/hosts to resolve
-                                              ;; "localhost".
-                                              (mkdir-p "/etc")
-                                              (call-with-output-file 
"/etc/hosts"
-                                                (lambda (port)
-                                                  (display "127.0.0.1 
localhost\n" port)
-                                                  (chmod port #o444)))))
-                                          #:writable-root?
-                                          (or writable-root?
-                                              (not (memq 'mnt namespaces)))))
-                    (lambda args
-                      ;; Forward the exception to the parent process.
-                      ;; FIXME: SRFI-35 conditions and non-trivial objects
-                      ;; cannot be 'read' so they shouldn't be written as is.
-                      (write args child)
-                      (primitive-exit 3))))
-
-                (when (and lock-mounts?
-                           (memq 'mnt namespaces)
-                           (memq 'user namespaces))
-                  ;; Create a new mount namespace owned by a new user
-                  ;; namespace to "lock" together previous mounts, such that
-                  ;; they cannot be unmounted or remounted separately--see
-                  ;; mount_namespaces(7).
-                  ;;
-                  ;; Note: at this point, the process is single-threaded (no
-                  ;; GC mark threads, no finalization thread, etc.) which is
-                  ;; why unshare(CLONE_NEWUSER) can be used.
-                  (let ((uid (getuid)) (gid (getgid)))
-                    (unshare (logior CLONE_NEWUSER CLONE_NEWNS))
-                    (gc-enable)
-                    (when (file-exists? "/proc/self")
-                      (initialize-user-namespace (getpid)
-                                                 host-uids
-                                                 #:host-uid uid
-                                                 #:host-gid gid
-                                                 #:guest-uid guest-uid
-                                                 #:guest-gid guest-gid))))
-
-                ;; TODO: Manage capabilities.
-                (write 'ready child)
-                (close-port child)
-                (thunk))
-               (_                                 ;parent died or something
-                (primitive-exit 2))))))
-         (pid
-          (close-port child)
-          (when (memq 'user namespaces)
-            (initialize-user-namespace pid host-uids
-                                       #:guest-uid guest-uid
-                                       #:guest-gid guest-gid))
-          ;; TODO: Initialize cgroups.
-          (write 'ready parent)
-          (newline parent)
-
-          ;; Check whether the child process' setup phase succeeded.
-          (let ((message (read parent)))
-            (close-port parent)
-            (match message
-              ('ready                             ;success
-               pid)
-              (((? symbol? key) args ...)         ;exception
-               (apply throw key args))
-              (_                                  ;unexpected termination
-               #f)))))))))
+     (safe-clone
+      (namespaces->bit-mask namespaces)
+      (lambda ()
+        (call-with-clean-exit
+         (lambda ()
+           (close-port parent)
+           ;; Wait for parent to set things up.
+           (match (read child)
+             ('ready
+              (purify-environment)
+              (when (and (memq 'mnt namespaces)
+                         (not (string=? root "/")))
+                (catch #t
+                  (lambda ()
+                    (mount-file-systems root mounts
+                                        #:mount-/proc? (memq 'pid namespaces)
+                                        #:mount-/sys?  (memq 'net
+                                                             namespaces)
+                                        #:populate-file-system
+                                        (lambda ()
+                                          (populate-file-system)
+                                          (when (and (memq 'net namespaces)
+                                                     loopback-network?)
+                                            (set-network-interface-up "lo")
+
+                                            ;; When isolated from the
+                                            ;; network, provide a minimal
+                                            ;; /etc/hosts to resolve
+                                            ;; "localhost".
+                                            (mkdir-p "/etc")
+                                            (call-with-output-file "/etc/hosts"
+                                              (lambda (port)
+                                                (display "127.0.0.1 
localhost\n" port)
+                                                (chmod port #o444)))))
+                                        #:writable-root?
+                                        (or writable-root?
+                                            (not (memq 'mnt namespaces)))))
+                  (lambda args
+                    ;; Forward the exception to the parent process.
+                    ;; FIXME: SRFI-35 conditions and non-trivial objects
+                    ;; cannot be 'read' so they shouldn't be written as is.
+                    (write args child)
+                    (primitive-exit 3))))
+
+              (when (and lock-mounts?
+                         (memq 'mnt namespaces)
+                         (memq 'user namespaces))
+                ;; Create a new mount namespace owned by a new user
+                ;; namespace to "lock" together previous mounts, such that
+                ;; they cannot be unmounted or remounted separately--see
+                ;; mount_namespaces(7).
+                (let ((uid (getuid)) (gid (getgid)))
+                  (unshare (logior CLONE_NEWUSER CLONE_NEWNS))
+                  (when (file-exists? "/proc/self")
+                    (initialize-user-namespace (getpid)
+                                               host-uids
+                                               #:host-uid uid
+                                               #:host-gid gid
+                                               #:guest-uid guest-uid
+                                               #:guest-gid guest-gid))))
+
+              ;; TODO: Manage capabilities.
+              (write 'ready child)
+              (close-port child)
+              (thunk))
+             (_                         ;parent died or something
+              (primitive-exit 2))))))
+      (lambda (pid)
+        (close-port child)
+        (when (memq 'user namespaces)
+          (initialize-user-namespace pid host-uids
+                                     #:guest-uid guest-uid
+                                     #:guest-gid guest-gid))
+        ;; TODO: Initialize cgroups.
+        (write 'ready parent)
+        (newline parent)
+
+        ;; Check whether the child process' setup phase succeeded.
+        (let ((message (read parent)))
+          (close-port parent)
+          (match message
+            ('ready                     ;success
+             pid)
+            (((? symbol? key) args ...) ;exception
+             (apply throw key args))
+            (_                          ;unexpected termination
+             #f))))))))
 
 ;; FIXME: This is copied from (guix utils), which we cannot use because it
 ;; would pull (guix config) and all.
diff --git a/guix/build/syscalls.scm b/guix/build/syscalls.scm
index 3106e4e3d6..d40b1ae5d9 100644
--- a/guix/build/syscalls.scm
+++ b/guix/build/syscalls.scm
@@ -150,6 +150,7 @@
             CLONE_THREAD
             CLONE_VM
             clone
+            safe-clone
             unshare
             setns
             get-user-ns
@@ -1170,17 +1171,45 @@ caller lacks root privileges."
 Turning finalization off shuts down the finalization thread as a side effect."
       (->bool ((force proc) (if enabled? 1 0))))))
 
-(define-syntax-rule (without-automatic-finalization exp)
-  "Turn off automatic finalization within the dynamic extent of EXP."
+(define-syntax-rule (without-automatic-finalization body ...)
+  "Turn off automatic finalization within the dynamic extent of BODY.  This is
+useful to ensure there is no finalization thread."
   (let ((enabled? #t))
     (dynamic-wind
       (lambda ()
         (set! enabled? (%set-automatic-finalization-enabled?! #f)))
       (lambda ()
-        exp)
+        body ...)
       (lambda ()
         (%set-automatic-finalization-enabled?! enabled?)))))
 
+(define-syntax-rule (without-garbage-collection body ...)
+  "Turn off garbage collection within the dynamic extent of BODY.  This is 
useful
+to avoid the creation new garbage collection thread.  Note that pre-existing
+GC marker threads are only disabled, not terminated."
+  (dynamic-wind
+    (lambda ()
+      (gc-disable))
+    (lambda ()
+      body ...)
+    (lambda ()
+      (gc-enable))))
+
+(define-syntax-rule (without-threads body ...)
+  "Ensure the Guile finalizer thread is stopped and that garbage collection 
does
+not run.  Note that pre-existing GC marker threads are only disabled, not
+terminated.  This also leaves the signal handling thread to be disabled by
+another means, since there is no Guile API to do so."
+  ;; Note: the three kind of threads that Guile can spawn are the finalization
+  ;; thread, the signal thread, or the GC marker threads.
+  (without-automatic-finalization
+   (without-garbage-collection body ...)))
+
+(define (ensure-signal-delivery-thread)
+  "Ensure the signal delivery thread is spawned and its state set
+ to 'RUNNING'.  This is valid as of the implementation as of Guile 3.0.9."
+  (sigaction SIGUSR1))                  ;could be any signal
+
 ;; The libc interface to sys_clone is not useful for Scheme programs, so the
 ;; low-level system call is wrapped instead.  The 'syscall' function is
 ;; declared in <unistd.h> as a variadic function; in practice, it expects 6
@@ -1223,6 +1252,24 @@ are shared between the parent and child processes."
                    (list err))
             ret)))))
 
+(define (safe-clone flags child parent)
+  "This is a raw clone syscall wrapper that ensures no Guile thread will be
+spawned during execution of the child.  `clone' is called with FLAGS.  CHILD
+is a thunk to run in the child process.  PARENT is procedure that accepts the
+child PID as argument.  This is useful in many contexts, such as when calling
+`unshare' or async-unsafe procedures in the child when the parent process
+memory (CLONE_VM) or threads (CLONE_THREAD) are shared with it."
+  ;; TODO: Contribute `clone' to Guile, and handle these complications there,
+  ;; similarly to how it's handled for scm_fork in posix.c.
+
+  ;; XXX: This is a hack: as of Guile 3.0.9, by starting the signal delivery
+  ;; thread in the parent, its state will be known as RUNNING, and the child
+  ;; won't attempt to start it itself.
+  (ensure-signal-delivery-thread)
+  (match (clone flags)
+    (0   (without-threads (child)))
+    (pid (parent pid))))
+
 (define (thread-count)
   "Return the complete thread count of the current process.  Unlike
 `all-threads', this also counts the Guile signal delivery, and finalizer
diff --git a/tests/syscalls.scm b/tests/syscalls.scm
index 879c3e4f25..a0483e68f0 100644
--- a/tests/syscalls.scm
+++ b/tests/syscalls.scm
@@ -3,6 +3,7 @@
 ;;; Copyright © 2015 David Thompson <[email protected]>
 ;;; Copyright © 2020 Simon South <[email protected]>
 ;;; Copyright © 2020 Mathieu Othacehe <[email protected]>
+;;; Copyright © 2025 Maxim Cournoyer <[email protected]>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -29,7 +30,8 @@
   #:use-module (srfi srfi-71)
   #:use-module (system foreign)
   #:use-module ((ice-9 ftw) #:select (scandir))
-  #:use-module (ice-9 match))
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 threads))
 
 ;; Test the (guix build syscalls) module, although there's not much that can
 ;; actually be tested without being root.
@@ -158,6 +160,38 @@
     (lambda args
       (system-error-errno args))))
 
+(define child-thunk
+  (lambda ()
+    (gc)                                ;spawn GC threads
+    (primitive-exit
+     (catch 'system-error
+       (lambda ()
+         (unshare CLONE_THREAD)
+         0)                             ;no error
+       (lambda args
+         (system-error-errno args))))))
+
+(define parent-proc
+  (lambda (pid)
+    (match (waitpid pid)
+       ((_ . status)
+        (status:exit-val status)))))
+
+(unless perform-container-tests?
+  (test-skip 1))
+(test-equal "clone and unshare triggers EINVAL"
+  EINVAL
+  (match (clone (logior CLONE_NEWUSER SIGCHLD))
+    (0   (child-thunk))
+    (pid (parent-proc pid))))
+
+(unless perform-container-tests?
+  (test-skip 1))
+(test-equal "safe-clone and unshare succeeds"
+  0
+  (safe-clone (logior CLONE_NEWUSER SIGCHLD)
+              child-thunk parent-proc))
+
 (unless perform-container-tests?
   (test-skip 1))
 (test-assert "setns"

Reply via email to