Supporting using PID namespace in order to isolate the ebuild processes
from host system, and make it possible to kill them all easily
(similarly to cgroups but easier to use).

Bug: https://bugs.gentoo.org/659582
Signed-off-by: Michał Górny <mgo...@gentoo.org>
---
 lib/portage/const.py                   |  1 +
 lib/portage/package/ebuild/doebuild.py |  8 +++--
 lib/portage/process.py                 | 48 +++++++++++++++++++++++---
 man/make.conf.5                        |  7 ++++
 4 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/lib/portage/const.py b/lib/portage/const.py
index e0f93f7cc..ca66bc46e 100644
--- a/lib/portage/const.py
+++ b/lib/portage/const.py
@@ -174,6 +174,7 @@ SUPPORTED_FEATURES       = frozenset([
        "notitles",
        "parallel-fetch",
        "parallel-install",
+       "pid-sandbox",
        "prelink-checksums",
        "preserve-libs",
        "protect-owned",
diff --git a/lib/portage/package/ebuild/doebuild.py 
b/lib/portage/package/ebuild/doebuild.py
index e84a618d2..9917ac82c 100644
--- a/lib/portage/package/ebuild/doebuild.py
+++ b/lib/portage/package/ebuild/doebuild.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2018 Gentoo Foundation
+# Copyright 2010-2018 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 from __future__ import unicode_literals
@@ -152,6 +152,7 @@ def _doebuild_spawn(phase, settings, actionmap=None, 
**kwargs):
        kwargs['networked'] = 'network-sandbox' not in settings.features or \
                phase in _networked_phases or \
                'network-sandbox' in settings['PORTAGE_RESTRICT'].split()
+       kwargs['pidns'] = 'pid-sandbox' in settings.features
 
        if phase == 'depend':
                kwargs['droppriv'] = 'userpriv' in settings.features
@@ -1482,7 +1483,7 @@ def _validate_deps(mysettings, myroot, mydo, mydbapi):
 # XXX Issue: cannot block execution. Deadlock condition.
 def spawn(mystring, mysettings, debug=False, free=False, droppriv=False,
        sesandbox=False, fakeroot=False, networked=True, ipc=True,
-       mountns=False, **keywords):
+       mountns=False, pidns=False, **keywords):
        """
        Spawn a subprocess with extra portage-specific options.
        Optiosn include:
@@ -1518,6 +1519,8 @@ def spawn(mystring, mysettings, debug=False, free=False, 
droppriv=False,
        @type ipc: Boolean
        @param mountns: Run this command inside mount namespace
        @type mountns: Boolean
+       @param pidns: Run this command in isolated PID namespace
+       @type pidns: Boolean
        @param keywords: Extra options encoded as a dict, to be passed to spawn
        @type keywords: Dictionary
        @rtype: Integer
@@ -1551,6 +1554,7 @@ def spawn(mystring, mysettings, debug=False, free=False, 
droppriv=False,
                keywords['unshare_net'] = not networked
                keywords['unshare_ipc'] = not ipc
                keywords['unshare_mount'] = mountns
+               keywords['unshare_pid'] = pidns
 
                if not networked and mysettings.get("EBUILD_PHASE") != 
"nofetch" and \
                        ("network-sandbox-proxy" in features or "distcc" in 
features):
diff --git a/lib/portage/process.py b/lib/portage/process.py
index 46868f442..dee126c3c 100644
--- a/lib/portage/process.py
+++ b/lib/portage/process.py
@@ -223,7 +223,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, 
returnpid=False,
           uid=None, gid=None, groups=None, umask=None, logfile=None,
           path_lookup=True, pre_exec=None,
           close_fds=(sys.version_info < (3, 4)), unshare_net=False,
-          unshare_ipc=False, unshare_mount=False, cgroup=None):
+          unshare_ipc=False, unshare_mount=False, unshare_pid=False,
+         cgroup=None):
        """
        Spawns a given command.
        
@@ -264,6 +265,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, 
returnpid=False,
        @param unshare_mount: If True, mount namespace will be unshared and 
mounts will
                be private to the namespace
        @type unshare_mount: Boolean
+       @param unshare_pid: If True, PID ns will be unshared from the spawned 
process
+       @type unshare_pid: Boolean
        @param cgroup: CGroup path to bind the process to
        @type cgroup: String
 
@@ -332,7 +335,7 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, 
returnpid=False,
        # This caches the libc library lookup in the current
        # process, so that it's only done once rather than
        # for each child process.
-       if unshare_net or unshare_ipc or unshare_mount:
+       if unshare_net or unshare_ipc or unshare_mount or unshare_pid:
                find_library("c")
 
        # Force instantiation of portage.data.userpriv_groups before the
@@ -348,7 +351,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, 
returnpid=False,
                        try:
                                _exec(binary, mycommand, opt_name, fd_pipes,
                                        env, gid, groups, uid, umask, pre_exec, 
close_fds,
-                                       unshare_net, unshare_ipc, 
unshare_mount, cgroup)
+                                       unshare_net, unshare_ipc, 
unshare_mount, unshare_pid,
+                                       cgroup)
                        except SystemExit:
                                raise
                        except Exception as e:
@@ -418,7 +422,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, 
returnpid=False,
        return 0
 
 def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask,
-       pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, cgroup):
+       pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, 
unshare_pid,
+       cgroup):
 
        """
        Execute a given binary with options
@@ -450,6 +455,8 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, 
groups, uid, umask,
        @param unshare_mount: If True, mount namespace will be unshared and 
mounts will
                be private to the namespace
        @type unshare_mount: Boolean
+       @param unshare_pid: If True, PID ns will be unshared from the spawned 
process
+       @type unshare_pid: Boolean
        @param cgroup: CGroup path to bind the process to
        @type cgroup: String
        @rtype: None
@@ -506,7 +513,7 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, 
groups, uid, umask,
                        f.write('%d\n' % os.getpid())
 
        # Unshare (while still uid==0)
-       if unshare_net or unshare_ipc or unshare_mount:
+       if unshare_net or unshare_ipc or unshare_mount or unshare_pid:
                filename = find_library("c")
                if filename is not None:
                        libc = LoadLibrary(filename)
@@ -514,6 +521,7 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, 
groups, uid, umask,
                                # from /usr/include/bits/sched.h
                                CLONE_NEWNS = 0x00020000
                                CLONE_NEWIPC = 0x08000000
+                               CLONE_NEWPID = 0x20000000
                                CLONE_NEWNET = 0x40000000
 
                                flags = 0
@@ -524,6 +532,9 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, 
groups, uid, umask,
                                if unshare_mount:
                                        # NEWNS = mount namespace
                                        flags |= CLONE_NEWNS
+                               if unshare_pid:
+                                       # we also need mount namespace for 
slave /proc
+                                       flags |= CLONE_NEWPID | CLONE_NEWNS
 
                                try:
                                        if libc.unshare(flags) != 0:
@@ -531,6 +542,15 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, 
groups, uid, umask,
                                                        
errno.errorcode.get(ctypes.get_errno(), '?')),
                                                        noiselevel=-1)
                                        else:
+                                               if unshare_pid:
+                                                       # pid namespace 
requires us to become init
+                                                       # TODO: do init-ty stuff
+                                                       # therefore, fork() ASAP
+                                                       fork_ret = os.fork()
+                                                       if fork_ret != 0:
+                                                               pid, status = 
os.waitpid(fork_ret, 0)
+                                                               assert pid == 
fork_ret
+                                                               os._exit(status)
                                                if unshare_mount:
                                                        # mark the whole 
filesystem as slave to avoid
                                                        # mounts escaping the 
namespace
@@ -541,6 +561,24 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, 
groups, uid, umask,
                                                                # TODO: should 
it be fatal maybe?
                                                                
writemsg("Unable to mark mounts slave: %d\n" % (mount_ret,),
                                                                        
noiselevel=-1)
+                                               if unshare_pid:
+                                                       # we need at least 
/proc being slave
+                                                       s = 
subprocess.Popen(['mount',
+                                                               '--make-slave', 
'/proc'])
+                                                       mount_ret = s.wait()
+                                                       if mount_ret != 0:
+                                                               # can't proceed 
with shared /proc
+                                                               
writemsg("Unable to mark /proc slave: %d\n" % (mount_ret,),
+                                                                       
noiselevel=-1)
+                                                               os._exit(1)
+                                                       # mount new /proc for 
our namespace
+                                                       s = 
subprocess.Popen(['mount',
+                                                               '-t', 'proc', 
'proc', '/proc'])
+                                                       mount_ret = s.wait()
+                                                       if mount_ret != 0:
+                                                               
writemsg("Unable to mount new /proc: %d\n" % (mount_ret,),
+                                                                       
noiselevel=-1)
+                                                               os._exit(1)
                                                if unshare_net:
                                                        # 'up' the loopback
                                                        IFF_UP = 0x1
diff --git a/man/make.conf.5 b/man/make.conf.5
index 7cb5741ad..de04e5e34 100644
--- a/man/make.conf.5
+++ b/man/make.conf.5
@@ -558,6 +558,13 @@ Use finer\-grained locks when installing packages, 
allowing for greater
 parallelization. For additional parallelization, disable
 \fIebuild\-locks\fR.
 .TP
+.B pid\-sandbox
+Isolate the process space for the ebuild processes. This makes it
+possible to cleanly kill all processes spawned by the ebuild.
+Supported only on Linux. Requires PID and mount namespace support
+in kernel. /proc is remounted inside the mount namespace to account
+for new PID namespace.
+.TP
 .B prelink\-checksums
 If \fBprelink\fR(8) is installed then use it to undo any prelinks on files
 before computing checksums for merge and unmerge. This feature is
-- 
2.19.1


Reply via email to