From 3b8880f86d9420528d7baadd17859c7aafdb1124 Mon Sep 17 00:00:00 2001
From: Jay Faulkner <jay@jvf.cc>
Date: Fri, 20 Feb 2015 21:59:47 +0000
Subject: [PATCH] nspawn: Map all seccomp filters to capabilities

This change makes it so all seccomp filters are mapped
to the appropriate capability and are only added if that
capability was not requested when running the container.

This unbreaks the remaining use cases broken by the
addition of seccomp filters without respecting requested
capabilities.
---
 src/nspawn/nspawn.c | 98 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 75 insertions(+), 23 deletions(-)

diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 0d8d199..59efa53 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -2557,26 +2557,53 @@ static int setup_ipvlan(pid_t pid) {
         return 0;
 }
 
+static int add_seccomp_unless_capability(
+        const int *blacklist,
+        size_t blacklist_length,
+        scmp_filter_ctx seccomp,
+        int capability) {
+
+        int r = 0;
+
+        if (!(arg_retain & (1ULL << capability))) {
+                for (unsigned i = 0; i < blacklist_length; i++) {
+                        r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i], 0);
+                        if (r == -EFAULT)
+                                continue; /* unknown syscall */
+                        if (r < 0) {
+                                log_error_errno(r, "Failed to block syscall: %m");
+                                return r;
+                        }
+                }
+        }
+        return r;
+}
+
 static int setup_seccomp(void) {
 
 #ifdef HAVE_SECCOMP
-        static const int blacklist[] = {
-                SCMP_SYS(kexec_load),
-                SCMP_SYS(open_by_handle_at),
+        static const int sysrawio_blacklist[] = {
                 SCMP_SYS(iopl),
                 SCMP_SYS(ioperm),
+        };
+
+        static const int sysboot_blacklist[] = {
+                SCMP_SYS(kexec_load),
+        };
+
+        static const int sysadmin_blacklist[] = {
                 SCMP_SYS(swapon),
                 SCMP_SYS(swapoff),
+                SCMP_SYS(open_by_handle_at),
         };
 
-        static const int kmod_blacklist[] = {
+        static const int sysmodule_blacklist[] = {
                 SCMP_SYS(init_module),
                 SCMP_SYS(finit_module),
                 SCMP_SYS(delete_module),
         };
 
         scmp_filter_ctx seccomp;
-        unsigned i;
         int r;
 
         seccomp = seccomp_init(SCMP_ACT_ALLOW);
@@ -2589,28 +2616,53 @@ static int setup_seccomp(void) {
                 goto finish;
         }
 
-        for (i = 0; i < ELEMENTSOF(blacklist); i++) {
-                r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i], 0);
-                if (r == -EFAULT)
-                        continue; /* unknown syscall */
-                if (r < 0) {
-                        log_error_errno(r, "Failed to block syscall: %m");
-                        goto finish;
-                }
+        /* If the CAP_SYS_RAWIO capability is not requested,
+         * then block iopl and ioperm */
+        r = add_seccomp_unless_capability(
+                sysrawio_blacklist,
+                ELEMENTSOF(sysrawio_blacklist),
+                seccomp,
+                CAP_SYS_RAWIO
+        );
+
+        if (r < 0) {
+                goto finish;
+        }
+
+        /* If the CAP_SYS_BOOT capability is not requested then
+         * we'll block kexec syscall too */
+        r = add_seccomp_unless_capability(
+                sysboot_blacklist,
+                ELEMENTSOF(sysboot_blacklist),
+                seccomp,
+                CAP_SYS_BOOT
+        );
+        if (r < 0) {
+                goto finish;
+        }
+
+        /* If the CAP_SYS_ADMIN capability is not requested then
+         * we'll block use of swap and open_by_handle_at */
+        r = add_seccomp_unless_capability(
+                sysadmin_blacklist,
+                ELEMENTSOF(sysadmin_blacklist),
+                seccomp,
+                CAP_SYS_BOOT
+        );
+        if (r < 0) {
+                goto finish;
         }
 
         /* If the CAP_SYS_MODULE capability is not requested then
          * we'll block the kmod syscalls too */
-        if (!(arg_retain & (1ULL << CAP_SYS_MODULE))) {
-                for (i = 0; i < ELEMENTSOF(kmod_blacklist); i++) {
-                        r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), kmod_blacklist[i], 0);
-                        if (r == -EFAULT)
-                                continue; /* unknown syscall */
-                        if (r < 0) {
-                                log_error_errno(r, "Failed to block syscall: %m");
-                                goto finish;
-                        }
-                }
+        r = add_seccomp_unless_capability(
+                sysmodule_blacklist,
+                ELEMENTSOF(sysmodule_blacklist),
+                seccomp,
+                CAP_SYS_MODULE
+        );
+        if (r < 0) {
+                goto finish;
         }
 
         /*
