From: Marc-André Lureau <marcandre.lur...@redhat.com> When using "-seccomp on", the seccomp policy is only applied to the main thread, the vcpu worker thread and other worker threads created after seccomp policy is applied; the seccomp policy is not applied to e.g. the RCU thread because it is created before the seccomp policy is applied and SECCOMP_FILTER_FLAG_TSYNC isn't used.
This can be verified with for task in /proc/`pidof qemu`/task/*; do cat $task/status | grep Secc ; done Seccomp: 2 Seccomp: 0 Seccomp: 0 Seccomp: 2 Seccomp: 2 Seccomp: 2 Starting with libseccomp 2.2.0 and kernel >= 3.17, we can use seccomp_attr_set(ctx, > SCMP_FLTATR_CTL_TSYNC, 1) to update the policy on all threads. Do it by default if possible, warn if not possible. Add an option to set the tsync behaviour explicitly. Note: we can't bump libseccomp to 2.2.0 since it's not available in Debian oldstable (2.1.0). Signed-off-by: Marc-André Lureau <marcandre.lur...@redhat.com> Acked-by: Eduardo Otubo <ot...@redhat.com> --- qemu-options.hx | 2 ++ qemu-seccomp.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/qemu-options.hx b/qemu-options.hx index 5515dfaba5..dafacb60c6 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3864,6 +3864,8 @@ Disable set*uid|gid system calls Disable *fork and execve @item resourcecontrol=@var{string} Disable process affinity and schedular priority +@item tsync=@var{bool} +Apply seccomp filter to all threads (default is auto, and will warn if fail) @end table ETEXI diff --git a/qemu-seccomp.c b/qemu-seccomp.c index f0c833f3ca..aa23eae970 100644 --- a/qemu-seccomp.c +++ b/qemu-seccomp.c @@ -119,6 +119,45 @@ qemu_seccomp(unsigned int operation, unsigned int flags, void *args) #endif } +static bool qemu_seccomp_syscall_check(void) +{ + int rc; + + /* + * this is an invalid call because the second argument is non-zero, but + * depending on the errno value of ENOSYS or EINVAL we can guess if the + * seccomp() syscal is supported or not + */ + rc = qemu_seccomp(SECCOMP_SET_MODE_STRICT, 1, NULL); + if (rc < 0 && errno == EINVAL) { + return true; + } + + return false; +} + +static bool qemu_seccomp_get_default_tsync(void) +{ + bool tsync = true; + + /* TSYNC support was added with the syscall */ + if (!qemu_seccomp_syscall_check()) { + error_report("The host kernel doesn't support seccomp TSYNC!"); + tsync = false; + } + +#if !(SCMP_VER_MAJOR >= 2 && SCMP_VER_MINOR >= 2) + error_report("libseccomp is too old to support TSYNC!"); + tsync = false; +#endif + + if (!tsync) { + error_report("Only the main thread will be filtered by seccomp!"); + } + + return tsync; +} + static uint32_t qemu_seccomp_get_kill_action(void) { #if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \ @@ -136,7 +175,7 @@ static uint32_t qemu_seccomp_get_kill_action(void) } -static int seccomp_start(uint32_t seccomp_opts) +static int seccomp_start(uint32_t seccomp_opts, bool tsync) { int rc = 0; unsigned int i = 0; @@ -149,6 +188,17 @@ static int seccomp_start(uint32_t seccomp_opts) goto seccomp_return; } + if (tsync) { +#if SCMP_VER_MAJOR >= 2 && SCMP_VER_MINOR >= 2 + rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1); +#else + rc = -1; +#endif + if (rc != 0) { + goto seccomp_return; + } + } + for (i = 0; i < ARRAY_SIZE(blacklist); i++) { if (!(seccomp_opts & blacklist[i].set)) { continue; @@ -175,6 +225,13 @@ int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT | QEMU_SECCOMP_SET_OBSOLETE; const char *value = NULL; + bool tsync; + + if (qemu_opt_get(opts, "tsync")) { + tsync = qemu_opt_get_bool(opts, "tsync", true); + } else { + tsync = qemu_seccomp_get_default_tsync(); + } value = qemu_opt_get(opts, "obsolete"); if (value) { @@ -236,7 +293,7 @@ int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp) } } - if (seccomp_start(seccomp_opts) < 0) { + if (seccomp_start(seccomp_opts, tsync) < 0) { error_report("failed to install seccomp syscall filter " "in the kernel"); return -1; @@ -271,6 +328,10 @@ static QemuOptsList qemu_sandbox_opts = { .name = "resourcecontrol", .type = QEMU_OPT_STRING, }, + { + .name = "tsync", + .type = QEMU_OPT_BOOL, + }, { /* end of list */ } }, }; -- 2.17.1