Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 10:26 AM David Abdurachmanov wrote: > > On Thu, Dec 6, 2018 at 5:52 PM Kees Cook wrote: > > > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > > wrote: > > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > > > Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c > > as well? That selftest finds a lot of weird corner-cases... > > I hate it locally and will include in v2. I hate it too. ;) But seriously (reading through the "have" typo), that's awesome. Thanks! > The results see fine (tested in QEMU). Great! -- Kees Cook
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 10:26 AM David Abdurachmanov wrote: > > On Thu, Dec 6, 2018 at 5:52 PM Kees Cook wrote: > > > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > > wrote: > > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > > > Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c > > as well? That selftest finds a lot of weird corner-cases... > > I hate it locally and will include in v2. I hate it too. ;) But seriously (reading through the "have" typo), that's awesome. Thanks! > The results see fine (tested in QEMU). Great! -- Kees Cook
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 5:52 PM Kees Cook wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c > as well? That selftest finds a lot of weird corner-cases... I hate it locally and will include in v2. The results see fine (tested in QEMU). TAP version 13 selftests: seccomp: seccomp_bpf [==] Running 64 tests from 1 test cases. [ RUN ] global.mode_strict_support [ OK ] global.mode_strict_support [ RUN ] global.mode_strict_cannot_call_prctl [ OK ] global.mode_strict_cannot_call_prctl [ RUN ] global.no_new_privs_support [ OK ] global.no_new_privs_support [ RUN ] global.mode_filter_support [ OK ] global.mode_filter_support [ RUN ] global.mode_filter_without_nnp [ OK ] global.mode_filter_without_nnp [ RUN ] global.filter_size_limits [ OK ] global.filter_size_limits [ RUN ] global.filter_chain_limits [ OK ] global.filter_chain_limits [ RUN ] global.mode_filter_cannot_move_to_strict [ OK ] global.mode_filter_cannot_move_to_strict [ RUN ] global.mode_filter_get_seccomp [ OK ] global.mode_filter_get_seccomp [ RUN ] global.ALLOW_all [ OK ] global.ALLOW_all [ RUN ] global.empty_prog [ OK ] global.empty_prog [ RUN ] global.log_all [ OK ] global.log_all [ RUN ] global.unknown_ret_is_kill_inside [ OK ] global.unknown_ret_is_kill_inside [ RUN ] global.unknown_ret_is_kill_above_allow [ OK ] global.unknown_ret_is_kill_above_allow [ RUN ] global.KILL_all [ OK ] global.KILL_all [ RUN ] global.KILL_one [ OK ] global.KILL_one [ RUN ] global.KILL_one_arg_one [ OK ] global.KILL_one_arg_one [ RUN ] global.KILL_one_arg_six [ OK ] global.KILL_one_arg_six [ RUN ] global.KILL_thread [ OK ] global.KILL_thread [ RUN ] global.KILL_process [ OK ] global.KILL_process [ RUN ] global.arg_out_of_range [ OK ] global.arg_out_of_range [ RUN ] global.ERRNO_valid [ OK ] global.ERRNO_valid [ RUN ] global.ERRNO_zero [ OK ] global.ERRNO_zero [ RUN ] global.ERRNO_capped [ OK ] global.ERRNO_capped [ RUN ] global.ERRNO_order [ OK ] global.ERRNO_order [ RUN ] TRAP.dfl [ OK ] TRAP.dfl [ RUN ] TRAP.ign [ OK ] TRAP.ign [ RUN ] TRAP.handler [ OK ] TRAP.handler [ RUN ] precedence.allow_ok [ OK ] precedence.allow_ok [ RUN ] precedence.kill_is_highest [ OK ] precedence.kill_is_highest [ RUN ] precedence.kill_is_highest_in_any_order [ OK ] precedence.kill_is_highest_in_any_order [ RUN ] precedence.trap_is_second [ OK ] precedence.trap_is_second [ RUN ] precedence.trap_is_second_in_any_order [ OK ] precedence.trap_is_second_in_any_order [ RUN ] precedence.errno_is_third [ OK ] precedence.errno_is_third [ RUN ] precedence.errno_is_third_in_any_order [ OK ] precedence.errno_is_third_in_any_order [ RUN ] precedence.trace_is_fourth [ OK ] precedence.trace_is_fourth [ RUN ] precedence.trace_is_fourth_in_any_order [ OK ] precedence.trace_is_fourth_in_any_order [ RUN ] precedence.log_is_fifth [ OK ] precedence.log_is_fifth [ RUN ] precedence.log_is_fifth_in_any_order [ OK ] precedence.log_is_fifth_in_any_order [ RUN ] TRACE_poke.read_has_side_effects [ OK ] TRACE_poke.read_has_side_effects [ RUN ] TRACE_poke.getpid_runs_normally [ OK ] TRACE_poke.getpid_runs_normally [ RUN ] TRACE_syscall.ptrace_syscall_redirected [ OK ] TRACE_syscall.ptrace_syscall_redirected [ RUN ] TRACE_syscall.ptrace_syscall_dropped [ OK ] TRACE_syscall.ptrace_syscall_dropped [ RUN ] TRACE_syscall.syscall_allowed [ OK ] TRACE_syscall.syscall_allowed [ RUN ] TRACE_syscall.syscall_redirected [ OK ] TRACE_syscall.syscall_redirected [ RUN ] TRACE_syscall.syscall_dropped [ OK ] TRACE_syscall.syscall_dropped [ RUN ] TRACE_syscall.skip_after_RET_TRACE [ OK ] TRACE_syscall.skip_after_RET_TRACE [ RUN ] TRACE_syscall.kill_after_RET_TRACE [ OK ] TRACE_syscall.kill_after_RET_TRACE [ RUN ] TRACE_syscall.skip_after_ptrace [ OK ] TRACE_syscall.skip_after_ptrace [ RUN ] TRACE_syscall.kill_after_ptrace [ OK ] TRACE_syscall.kill_after_ptrace [ RUN ] global.seccomp_syscall [ OK ] global.seccomp_syscall [ RUN ] global.seccomp_syscall_mode_lock [ OK ] global.seccomp_syscall_mode_lock [ RUN ] global.detect_seccomp_filter_flags [ OK ] global.detect_seccomp_filter_flags [ RUN ] global.TSYNC_first [ OK ] global.TSYNC_first [ RUN ] TSYNC.siblings_fail_prctl [ OK ] TSYNC.siblings_fail_prctl [ RUN ]
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 5:52 PM Kees Cook wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c > as well? That selftest finds a lot of weird corner-cases... I hate it locally and will include in v2. The results see fine (tested in QEMU). TAP version 13 selftests: seccomp: seccomp_bpf [==] Running 64 tests from 1 test cases. [ RUN ] global.mode_strict_support [ OK ] global.mode_strict_support [ RUN ] global.mode_strict_cannot_call_prctl [ OK ] global.mode_strict_cannot_call_prctl [ RUN ] global.no_new_privs_support [ OK ] global.no_new_privs_support [ RUN ] global.mode_filter_support [ OK ] global.mode_filter_support [ RUN ] global.mode_filter_without_nnp [ OK ] global.mode_filter_without_nnp [ RUN ] global.filter_size_limits [ OK ] global.filter_size_limits [ RUN ] global.filter_chain_limits [ OK ] global.filter_chain_limits [ RUN ] global.mode_filter_cannot_move_to_strict [ OK ] global.mode_filter_cannot_move_to_strict [ RUN ] global.mode_filter_get_seccomp [ OK ] global.mode_filter_get_seccomp [ RUN ] global.ALLOW_all [ OK ] global.ALLOW_all [ RUN ] global.empty_prog [ OK ] global.empty_prog [ RUN ] global.log_all [ OK ] global.log_all [ RUN ] global.unknown_ret_is_kill_inside [ OK ] global.unknown_ret_is_kill_inside [ RUN ] global.unknown_ret_is_kill_above_allow [ OK ] global.unknown_ret_is_kill_above_allow [ RUN ] global.KILL_all [ OK ] global.KILL_all [ RUN ] global.KILL_one [ OK ] global.KILL_one [ RUN ] global.KILL_one_arg_one [ OK ] global.KILL_one_arg_one [ RUN ] global.KILL_one_arg_six [ OK ] global.KILL_one_arg_six [ RUN ] global.KILL_thread [ OK ] global.KILL_thread [ RUN ] global.KILL_process [ OK ] global.KILL_process [ RUN ] global.arg_out_of_range [ OK ] global.arg_out_of_range [ RUN ] global.ERRNO_valid [ OK ] global.ERRNO_valid [ RUN ] global.ERRNO_zero [ OK ] global.ERRNO_zero [ RUN ] global.ERRNO_capped [ OK ] global.ERRNO_capped [ RUN ] global.ERRNO_order [ OK ] global.ERRNO_order [ RUN ] TRAP.dfl [ OK ] TRAP.dfl [ RUN ] TRAP.ign [ OK ] TRAP.ign [ RUN ] TRAP.handler [ OK ] TRAP.handler [ RUN ] precedence.allow_ok [ OK ] precedence.allow_ok [ RUN ] precedence.kill_is_highest [ OK ] precedence.kill_is_highest [ RUN ] precedence.kill_is_highest_in_any_order [ OK ] precedence.kill_is_highest_in_any_order [ RUN ] precedence.trap_is_second [ OK ] precedence.trap_is_second [ RUN ] precedence.trap_is_second_in_any_order [ OK ] precedence.trap_is_second_in_any_order [ RUN ] precedence.errno_is_third [ OK ] precedence.errno_is_third [ RUN ] precedence.errno_is_third_in_any_order [ OK ] precedence.errno_is_third_in_any_order [ RUN ] precedence.trace_is_fourth [ OK ] precedence.trace_is_fourth [ RUN ] precedence.trace_is_fourth_in_any_order [ OK ] precedence.trace_is_fourth_in_any_order [ RUN ] precedence.log_is_fifth [ OK ] precedence.log_is_fifth [ RUN ] precedence.log_is_fifth_in_any_order [ OK ] precedence.log_is_fifth_in_any_order [ RUN ] TRACE_poke.read_has_side_effects [ OK ] TRACE_poke.read_has_side_effects [ RUN ] TRACE_poke.getpid_runs_normally [ OK ] TRACE_poke.getpid_runs_normally [ RUN ] TRACE_syscall.ptrace_syscall_redirected [ OK ] TRACE_syscall.ptrace_syscall_redirected [ RUN ] TRACE_syscall.ptrace_syscall_dropped [ OK ] TRACE_syscall.ptrace_syscall_dropped [ RUN ] TRACE_syscall.syscall_allowed [ OK ] TRACE_syscall.syscall_allowed [ RUN ] TRACE_syscall.syscall_redirected [ OK ] TRACE_syscall.syscall_redirected [ RUN ] TRACE_syscall.syscall_dropped [ OK ] TRACE_syscall.syscall_dropped [ RUN ] TRACE_syscall.skip_after_RET_TRACE [ OK ] TRACE_syscall.skip_after_RET_TRACE [ RUN ] TRACE_syscall.kill_after_RET_TRACE [ OK ] TRACE_syscall.kill_after_RET_TRACE [ RUN ] TRACE_syscall.skip_after_ptrace [ OK ] TRACE_syscall.skip_after_ptrace [ RUN ] TRACE_syscall.kill_after_ptrace [ OK ] TRACE_syscall.kill_after_ptrace [ RUN ] global.seccomp_syscall [ OK ] global.seccomp_syscall [ RUN ] global.seccomp_syscall_mode_lock [ OK ] global.seccomp_syscall_mode_lock [ RUN ] global.detect_seccomp_filter_flags [ OK ] global.detect_seccomp_filter_flags [ RUN ] global.TSYNC_first [ OK ] global.TSYNC_first [ RUN ] TSYNC.siblings_fail_prctl [ OK ] TSYNC.siblings_fail_prctl [ RUN ]
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 6:07 PM Kees Cook wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > I built this against linux-next but it's missing seccomp.h. Was that > accidentally left out of the commit? > > > CC arch/riscv/kernel/asm-offsets.s > In file included from ./include/linux/sched.h:21:0, > from arch/riscv/kernel/asm-offsets.c:18: > ./include/linux/seccomp.h:14:10: fatal error: asm/seccomp.h: No such > file or directory > #include > ^~~ > I forgot to add it... Will fix it. david
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 6:07 PM Kees Cook wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > I built this against linux-next but it's missing seccomp.h. Was that > accidentally left out of the commit? > > > CC arch/riscv/kernel/asm-offsets.s > In file included from ./include/linux/sched.h:21:0, > from arch/riscv/kernel/asm-offsets.c:18: > ./include/linux/seccomp.h:14:10: fatal error: asm/seccomp.h: No such > file or directory > #include > ^~~ > I forgot to add it... Will fix it. david
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 5:52 PM Kees Cook wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c > as well? That selftest finds a lot of weird corner-cases... > > > diff --git a/arch/riscv/include/asm/thread_info.h > > b/arch/riscv/include/asm/thread_info.h > > index 1c9cc8389928..1fd6e4130cab 100644 > > --- a/arch/riscv/include/asm/thread_info.h > > +++ b/arch/riscv/include/asm/thread_info.h > > @@ -81,6 +81,7 @@ struct thread_info { > > #define TIF_MEMDIE 5 /* is terminating due to OOM killer > > */ > > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint > > instrumentation */ > > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > > +#define TIF_SECCOMP8 /* syscall secure computing > > */ > > Nit: extra tab needs to be removed. Will fix it. I need to cleanup my vim configuration for tab with. david
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 5:52 PM Kees Cook wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c > as well? That selftest finds a lot of weird corner-cases... > > > diff --git a/arch/riscv/include/asm/thread_info.h > > b/arch/riscv/include/asm/thread_info.h > > index 1c9cc8389928..1fd6e4130cab 100644 > > --- a/arch/riscv/include/asm/thread_info.h > > +++ b/arch/riscv/include/asm/thread_info.h > > @@ -81,6 +81,7 @@ struct thread_info { > > #define TIF_MEMDIE 5 /* is terminating due to OOM killer > > */ > > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint > > instrumentation */ > > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > > +#define TIF_SECCOMP8 /* syscall secure computing > > */ > > Nit: extra tab needs to be removed. Will fix it. I need to cleanup my vim configuration for tab with. david
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 5:47 PM Kees Cook wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > wrote: > > > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > > > Signed-off-by: David Abdurachmanov > > --- > > arch/riscv/Kconfig | 14 ++ > > arch/riscv/include/asm/thread_info.h | 5 - > > arch/riscv/kernel/entry.S| 27 +-- > > arch/riscv/kernel/ptrace.c | 8 > > 4 files changed, 51 insertions(+), 3 deletions(-) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index a4f48f757204..49cd8e251547 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -29,6 +29,7 @@ config RISCV > > select GENERIC_SMP_IDLE_THREAD > > select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A > > select HAVE_ARCH_AUDITSYSCALL > > + select HAVE_ARCH_SECCOMP_FILTER > > select HAVE_MEMBLOCK_NODE_MAP > > select HAVE_DMA_CONTIGUOUS > > select HAVE_FUTEX_CMPXCHG if FUTEX > > @@ -228,6 +229,19 @@ menu "Kernel features" > > > > source "kernel/Kconfig.hz" > > > > +config SECCOMP > > + bool "Enable seccomp to safely compute untrusted bytecode" > > + help > > + This kernel feature is useful for number crunching applications > > + that may need to compute untrusted bytecode during their > > + execution. By using pipes or other transports made available to > > + the process as file descriptors supporting the read/write > > + syscalls, it's possible to isolate those applications in > > + their own address space using seccomp. Once seccomp is > > + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled > > + and the task is only allowed to execute a few safe syscalls > > + defined by each seccomp mode. > > + > > endmenu > > > > menu "Boot options" > > diff --git a/arch/riscv/include/asm/thread_info.h > > b/arch/riscv/include/asm/thread_info.h > > index 1c9cc8389928..1fd6e4130cab 100644 > > --- a/arch/riscv/include/asm/thread_info.h > > +++ b/arch/riscv/include/asm/thread_info.h > > @@ -81,6 +81,7 @@ struct thread_info { > > #define TIF_MEMDIE 5 /* is terminating due to OOM killer > > */ > > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint > > instrumentation */ > > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > > +#define TIF_SECCOMP8 /* syscall secure computing > > */ > > > > #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) > > #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) > > @@ -88,11 +89,13 @@ struct thread_info { > > #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) > > #define _TIF_SYSCALL_TRACEPOINT(1 << TIF_SYSCALL_TRACEPOINT) > > #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) > > +#define _TIF_SECCOMP (1 << TIF_SECCOMP) > > > > #define _TIF_WORK_MASK \ > > (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) > > > > #define _TIF_SYSCALL_WORK \ > > - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) > > + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ > > +_TIF_SECCOMP ) > > > > #endif /* _ASM_RISCV_THREAD_INFO_H */ > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > > index 355166f57205..e88ccbfa61ee 100644 > > --- a/arch/riscv/kernel/entry.S > > +++ b/arch/riscv/kernel/entry.S > > @@ -207,8 +207,25 @@ check_syscall_nr: > > /* Check to make sure we don't jump to a bogus syscall number. */ > > li t0, __NR_syscalls > > la s0, sys_ni_syscall > > - /* Syscall number held in a7 */ > > - bgeu a7, t0, 1f > > + /* > > +* The tracer can change syscall number to valid/invalid value. > > +* We use syscall_set_nr helper in syscall_trace_enter thus we > > +* cannot trust the current value in a7 and have to reload from > > +* the current task pt_regs. > > +*/ > > + REG_L a7, PT_A7(sp) > > + /* > > +* Syscall number held in a7. > > +* If syscall number is above allowed value, redirect to ni_syscall. > > +*/ > > + bge a7, t0, 1f > > + /* > > +* Check if syscall is rejected by tracer or seccomp, i.e., a7 == > > -1. > > +* If yes, we pretend it was executed. > > +*/ > > + li t1, -1 > > + beq a7, t1, ret_from_syscall_rejected > > + /* Call syscall */ > > la s0, sys_call_table > > slli t0, a7, RISCV_LGPTR > > add s0, s0, t0 > > @@ -219,6 +236,12 @@ check_syscall_nr: > > ret_from_syscall: > > /* Set user a0 to kernel a0 */ > > REG_S a0, PT_A0(sp) > > + /* > > +* We didn't execute the actual syscall. > > +* Seccomp already set return value for the current task pt_regs. > > +* (If it was configured with
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 5:47 PM Kees Cook wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > wrote: > > > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > > > Signed-off-by: David Abdurachmanov > > --- > > arch/riscv/Kconfig | 14 ++ > > arch/riscv/include/asm/thread_info.h | 5 - > > arch/riscv/kernel/entry.S| 27 +-- > > arch/riscv/kernel/ptrace.c | 8 > > 4 files changed, 51 insertions(+), 3 deletions(-) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index a4f48f757204..49cd8e251547 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -29,6 +29,7 @@ config RISCV > > select GENERIC_SMP_IDLE_THREAD > > select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A > > select HAVE_ARCH_AUDITSYSCALL > > + select HAVE_ARCH_SECCOMP_FILTER > > select HAVE_MEMBLOCK_NODE_MAP > > select HAVE_DMA_CONTIGUOUS > > select HAVE_FUTEX_CMPXCHG if FUTEX > > @@ -228,6 +229,19 @@ menu "Kernel features" > > > > source "kernel/Kconfig.hz" > > > > +config SECCOMP > > + bool "Enable seccomp to safely compute untrusted bytecode" > > + help > > + This kernel feature is useful for number crunching applications > > + that may need to compute untrusted bytecode during their > > + execution. By using pipes or other transports made available to > > + the process as file descriptors supporting the read/write > > + syscalls, it's possible to isolate those applications in > > + their own address space using seccomp. Once seccomp is > > + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled > > + and the task is only allowed to execute a few safe syscalls > > + defined by each seccomp mode. > > + > > endmenu > > > > menu "Boot options" > > diff --git a/arch/riscv/include/asm/thread_info.h > > b/arch/riscv/include/asm/thread_info.h > > index 1c9cc8389928..1fd6e4130cab 100644 > > --- a/arch/riscv/include/asm/thread_info.h > > +++ b/arch/riscv/include/asm/thread_info.h > > @@ -81,6 +81,7 @@ struct thread_info { > > #define TIF_MEMDIE 5 /* is terminating due to OOM killer > > */ > > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint > > instrumentation */ > > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > > +#define TIF_SECCOMP8 /* syscall secure computing > > */ > > > > #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) > > #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) > > @@ -88,11 +89,13 @@ struct thread_info { > > #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) > > #define _TIF_SYSCALL_TRACEPOINT(1 << TIF_SYSCALL_TRACEPOINT) > > #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) > > +#define _TIF_SECCOMP (1 << TIF_SECCOMP) > > > > #define _TIF_WORK_MASK \ > > (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) > > > > #define _TIF_SYSCALL_WORK \ > > - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) > > + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ > > +_TIF_SECCOMP ) > > > > #endif /* _ASM_RISCV_THREAD_INFO_H */ > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > > index 355166f57205..e88ccbfa61ee 100644 > > --- a/arch/riscv/kernel/entry.S > > +++ b/arch/riscv/kernel/entry.S > > @@ -207,8 +207,25 @@ check_syscall_nr: > > /* Check to make sure we don't jump to a bogus syscall number. */ > > li t0, __NR_syscalls > > la s0, sys_ni_syscall > > - /* Syscall number held in a7 */ > > - bgeu a7, t0, 1f > > + /* > > +* The tracer can change syscall number to valid/invalid value. > > +* We use syscall_set_nr helper in syscall_trace_enter thus we > > +* cannot trust the current value in a7 and have to reload from > > +* the current task pt_regs. > > +*/ > > + REG_L a7, PT_A7(sp) > > + /* > > +* Syscall number held in a7. > > +* If syscall number is above allowed value, redirect to ni_syscall. > > +*/ > > + bge a7, t0, 1f > > + /* > > +* Check if syscall is rejected by tracer or seccomp, i.e., a7 == > > -1. > > +* If yes, we pretend it was executed. > > +*/ > > + li t1, -1 > > + beq a7, t1, ret_from_syscall_rejected > > + /* Call syscall */ > > la s0, sys_call_table > > slli t0, a7, RISCV_LGPTR > > add s0, s0, t0 > > @@ -219,6 +236,12 @@ check_syscall_nr: > > ret_from_syscall: > > /* Set user a0 to kernel a0 */ > > REG_S a0, PT_A0(sp) > > + /* > > +* We didn't execute the actual syscall. > > +* Seccomp already set return value for the current task pt_regs. > > +* (If it was configured with
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov wrote: > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). I built this against linux-next but it's missing seccomp.h. Was that accidentally left out of the commit? CC arch/riscv/kernel/asm-offsets.s In file included from ./include/linux/sched.h:21:0, from arch/riscv/kernel/asm-offsets.c:18: ./include/linux/seccomp.h:14:10: fatal error: asm/seccomp.h: No such file or directory #include ^~~ -- Kees Cook
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov wrote: > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). I built this against linux-next but it's missing seccomp.h. Was that accidentally left out of the commit? CC arch/riscv/kernel/asm-offsets.s In file included from ./include/linux/sched.h:21:0, from arch/riscv/kernel/asm-offsets.c:18: ./include/linux/seccomp.h:14:10: fatal error: asm/seccomp.h: No such file or directory #include ^~~ -- Kees Cook
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov wrote: > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c as well? That selftest finds a lot of weird corner-cases... > diff --git a/arch/riscv/include/asm/thread_info.h > b/arch/riscv/include/asm/thread_info.h > index 1c9cc8389928..1fd6e4130cab 100644 > --- a/arch/riscv/include/asm/thread_info.h > +++ b/arch/riscv/include/asm/thread_info.h > @@ -81,6 +81,7 @@ struct thread_info { > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint > instrumentation */ > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > +#define TIF_SECCOMP8 /* syscall secure computing */ Nit: extra tab needs to be removed. -- Kees Cook
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov wrote: > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c as well? That selftest finds a lot of weird corner-cases... > diff --git a/arch/riscv/include/asm/thread_info.h > b/arch/riscv/include/asm/thread_info.h > index 1c9cc8389928..1fd6e4130cab 100644 > --- a/arch/riscv/include/asm/thread_info.h > +++ b/arch/riscv/include/asm/thread_info.h > @@ -81,6 +81,7 @@ struct thread_info { > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint > instrumentation */ > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > +#define TIF_SECCOMP8 /* syscall secure computing */ Nit: extra tab needs to be removed. -- Kees Cook
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > Signed-off-by: David Abdurachmanov > --- > arch/riscv/Kconfig | 14 ++ > arch/riscv/include/asm/thread_info.h | 5 - > arch/riscv/kernel/entry.S| 27 +-- > arch/riscv/kernel/ptrace.c | 8 > 4 files changed, 51 insertions(+), 3 deletions(-) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index a4f48f757204..49cd8e251547 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -29,6 +29,7 @@ config RISCV > select GENERIC_SMP_IDLE_THREAD > select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A > select HAVE_ARCH_AUDITSYSCALL > + select HAVE_ARCH_SECCOMP_FILTER > select HAVE_MEMBLOCK_NODE_MAP > select HAVE_DMA_CONTIGUOUS > select HAVE_FUTEX_CMPXCHG if FUTEX > @@ -228,6 +229,19 @@ menu "Kernel features" > > source "kernel/Kconfig.hz" > > +config SECCOMP > + bool "Enable seccomp to safely compute untrusted bytecode" > + help > + This kernel feature is useful for number crunching applications > + that may need to compute untrusted bytecode during their > + execution. By using pipes or other transports made available to > + the process as file descriptors supporting the read/write > + syscalls, it's possible to isolate those applications in > + their own address space using seccomp. Once seccomp is > + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled > + and the task is only allowed to execute a few safe syscalls > + defined by each seccomp mode. > + > endmenu > > menu "Boot options" > diff --git a/arch/riscv/include/asm/thread_info.h > b/arch/riscv/include/asm/thread_info.h > index 1c9cc8389928..1fd6e4130cab 100644 > --- a/arch/riscv/include/asm/thread_info.h > +++ b/arch/riscv/include/asm/thread_info.h > @@ -81,6 +81,7 @@ struct thread_info { > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint > instrumentation */ > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > +#define TIF_SECCOMP8 /* syscall secure computing */ > > #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) > #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) > @@ -88,11 +89,13 @@ struct thread_info { > #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) > #define _TIF_SYSCALL_TRACEPOINT(1 << TIF_SYSCALL_TRACEPOINT) > #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) > +#define _TIF_SECCOMP (1 << TIF_SECCOMP) > > #define _TIF_WORK_MASK \ > (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) > > #define _TIF_SYSCALL_WORK \ > - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) > + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ > +_TIF_SECCOMP ) > > #endif /* _ASM_RISCV_THREAD_INFO_H */ > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > index 355166f57205..e88ccbfa61ee 100644 > --- a/arch/riscv/kernel/entry.S > +++ b/arch/riscv/kernel/entry.S > @@ -207,8 +207,25 @@ check_syscall_nr: > /* Check to make sure we don't jump to a bogus syscall number. */ > li t0, __NR_syscalls > la s0, sys_ni_syscall > - /* Syscall number held in a7 */ > - bgeu a7, t0, 1f > + /* > +* The tracer can change syscall number to valid/invalid value. > +* We use syscall_set_nr helper in syscall_trace_enter thus we > +* cannot trust the current value in a7 and have to reload from > +* the current task pt_regs. > +*/ > + REG_L a7, PT_A7(sp) > + /* > +* Syscall number held in a7. > +* If syscall number is above allowed value, redirect to ni_syscall. > +*/ > + bge a7, t0, 1f > + /* > +* Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. > +* If yes, we pretend it was executed. > +*/ > + li t1, -1 > + beq a7, t1, ret_from_syscall_rejected > + /* Call syscall */ > la s0, sys_call_table > slli t0, a7, RISCV_LGPTR > add s0, s0, t0 > @@ -219,6 +236,12 @@ check_syscall_nr: > ret_from_syscall: > /* Set user a0 to kernel a0 */ > REG_S a0, PT_A0(sp) > + /* > +* We didn't execute the actual syscall. > +* Seccomp already set return value for the current task pt_regs. > +* (If it was configured with SECCOMP_RET_ERRNO/TRACE) > +*/ > +ret_from_syscall_rejected: > /* Trace syscalls, but only if requested by the user. */ > REG_L t0, TASK_TI_FLAGS(tp) > andi t0, t0, _TIF_SYSCALL_WORK > diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c > index
Re: [PATCH 1/2] riscv: add support for SECCOMP incl. filters
On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > Signed-off-by: David Abdurachmanov > --- > arch/riscv/Kconfig | 14 ++ > arch/riscv/include/asm/thread_info.h | 5 - > arch/riscv/kernel/entry.S| 27 +-- > arch/riscv/kernel/ptrace.c | 8 > 4 files changed, 51 insertions(+), 3 deletions(-) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index a4f48f757204..49cd8e251547 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -29,6 +29,7 @@ config RISCV > select GENERIC_SMP_IDLE_THREAD > select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A > select HAVE_ARCH_AUDITSYSCALL > + select HAVE_ARCH_SECCOMP_FILTER > select HAVE_MEMBLOCK_NODE_MAP > select HAVE_DMA_CONTIGUOUS > select HAVE_FUTEX_CMPXCHG if FUTEX > @@ -228,6 +229,19 @@ menu "Kernel features" > > source "kernel/Kconfig.hz" > > +config SECCOMP > + bool "Enable seccomp to safely compute untrusted bytecode" > + help > + This kernel feature is useful for number crunching applications > + that may need to compute untrusted bytecode during their > + execution. By using pipes or other transports made available to > + the process as file descriptors supporting the read/write > + syscalls, it's possible to isolate those applications in > + their own address space using seccomp. Once seccomp is > + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled > + and the task is only allowed to execute a few safe syscalls > + defined by each seccomp mode. > + > endmenu > > menu "Boot options" > diff --git a/arch/riscv/include/asm/thread_info.h > b/arch/riscv/include/asm/thread_info.h > index 1c9cc8389928..1fd6e4130cab 100644 > --- a/arch/riscv/include/asm/thread_info.h > +++ b/arch/riscv/include/asm/thread_info.h > @@ -81,6 +81,7 @@ struct thread_info { > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint > instrumentation */ > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > +#define TIF_SECCOMP8 /* syscall secure computing */ > > #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) > #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) > @@ -88,11 +89,13 @@ struct thread_info { > #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) > #define _TIF_SYSCALL_TRACEPOINT(1 << TIF_SYSCALL_TRACEPOINT) > #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) > +#define _TIF_SECCOMP (1 << TIF_SECCOMP) > > #define _TIF_WORK_MASK \ > (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) > > #define _TIF_SYSCALL_WORK \ > - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) > + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ > +_TIF_SECCOMP ) > > #endif /* _ASM_RISCV_THREAD_INFO_H */ > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > index 355166f57205..e88ccbfa61ee 100644 > --- a/arch/riscv/kernel/entry.S > +++ b/arch/riscv/kernel/entry.S > @@ -207,8 +207,25 @@ check_syscall_nr: > /* Check to make sure we don't jump to a bogus syscall number. */ > li t0, __NR_syscalls > la s0, sys_ni_syscall > - /* Syscall number held in a7 */ > - bgeu a7, t0, 1f > + /* > +* The tracer can change syscall number to valid/invalid value. > +* We use syscall_set_nr helper in syscall_trace_enter thus we > +* cannot trust the current value in a7 and have to reload from > +* the current task pt_regs. > +*/ > + REG_L a7, PT_A7(sp) > + /* > +* Syscall number held in a7. > +* If syscall number is above allowed value, redirect to ni_syscall. > +*/ > + bge a7, t0, 1f > + /* > +* Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. > +* If yes, we pretend it was executed. > +*/ > + li t1, -1 > + beq a7, t1, ret_from_syscall_rejected > + /* Call syscall */ > la s0, sys_call_table > slli t0, a7, RISCV_LGPTR > add s0, s0, t0 > @@ -219,6 +236,12 @@ check_syscall_nr: > ret_from_syscall: > /* Set user a0 to kernel a0 */ > REG_S a0, PT_A0(sp) > + /* > +* We didn't execute the actual syscall. > +* Seccomp already set return value for the current task pt_regs. > +* (If it was configured with SECCOMP_RET_ERRNO/TRACE) > +*/ > +ret_from_syscall_rejected: > /* Trace syscalls, but only if requested by the user. */ > REG_L t0, TASK_TI_FLAGS(tp) > andi t0, t0, _TIF_SYSCALL_WORK > diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c > index
[PATCH 1/2] riscv: add support for SECCOMP incl. filters
The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). Signed-off-by: David Abdurachmanov --- arch/riscv/Kconfig | 14 ++ arch/riscv/include/asm/thread_info.h | 5 - arch/riscv/kernel/entry.S| 27 +-- arch/riscv/kernel/ptrace.c | 8 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a4f48f757204..49cd8e251547 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -29,6 +29,7 @@ config RISCV select GENERIC_SMP_IDLE_THREAD select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_SECCOMP_FILTER select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_CONTIGUOUS select HAVE_FUTEX_CMPXCHG if FUTEX @@ -228,6 +229,19 @@ menu "Kernel features" source "kernel/Kconfig.hz" +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + endmenu menu "Boot options" diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 1c9cc8389928..1fd6e4130cab 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -81,6 +81,7 @@ struct thread_info { #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ +#define TIF_SECCOMP8 /* syscall secure computing */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -88,11 +89,13 @@ struct thread_info { #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SYSCALL_TRACEPOINT(1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_WORK_MASK \ (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) #define _TIF_SYSCALL_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ +_TIF_SECCOMP ) #endif /* _ASM_RISCV_THREAD_INFO_H */ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 355166f57205..e88ccbfa61ee 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -207,8 +207,25 @@ check_syscall_nr: /* Check to make sure we don't jump to a bogus syscall number. */ li t0, __NR_syscalls la s0, sys_ni_syscall - /* Syscall number held in a7 */ - bgeu a7, t0, 1f + /* +* The tracer can change syscall number to valid/invalid value. +* We use syscall_set_nr helper in syscall_trace_enter thus we +* cannot trust the current value in a7 and have to reload from +* the current task pt_regs. +*/ + REG_L a7, PT_A7(sp) + /* +* Syscall number held in a7. +* If syscall number is above allowed value, redirect to ni_syscall. +*/ + bge a7, t0, 1f + /* +* Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. +* If yes, we pretend it was executed. +*/ + li t1, -1 + beq a7, t1, ret_from_syscall_rejected + /* Call syscall */ la s0, sys_call_table slli t0, a7, RISCV_LGPTR add s0, s0, t0 @@ -219,6 +236,12 @@ check_syscall_nr: ret_from_syscall: /* Set user a0 to kernel a0 */ REG_S a0, PT_A0(sp) + /* +* We didn't execute the actual syscall. +* Seccomp already set return value for the current task pt_regs. +* (If it was configured with SECCOMP_RET_ERRNO/TRACE) +*/ +ret_from_syscall_rejected: /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_WORK diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index c1b51539c3e2..598e48b8ca2b 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -160,6 +160,14 @@ void do_syscall_trace_enter(struct pt_regs *regs) if (tracehook_report_syscall_entry(regs)) syscall_set_nr(current, regs,
[PATCH 1/2] riscv: add support for SECCOMP incl. filters
The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). Signed-off-by: David Abdurachmanov --- arch/riscv/Kconfig | 14 ++ arch/riscv/include/asm/thread_info.h | 5 - arch/riscv/kernel/entry.S| 27 +-- arch/riscv/kernel/ptrace.c | 8 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a4f48f757204..49cd8e251547 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -29,6 +29,7 @@ config RISCV select GENERIC_SMP_IDLE_THREAD select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_SECCOMP_FILTER select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_CONTIGUOUS select HAVE_FUTEX_CMPXCHG if FUTEX @@ -228,6 +229,19 @@ menu "Kernel features" source "kernel/Kconfig.hz" +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + endmenu menu "Boot options" diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 1c9cc8389928..1fd6e4130cab 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -81,6 +81,7 @@ struct thread_info { #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ +#define TIF_SECCOMP8 /* syscall secure computing */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -88,11 +89,13 @@ struct thread_info { #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SYSCALL_TRACEPOINT(1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_WORK_MASK \ (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) #define _TIF_SYSCALL_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ +_TIF_SECCOMP ) #endif /* _ASM_RISCV_THREAD_INFO_H */ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 355166f57205..e88ccbfa61ee 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -207,8 +207,25 @@ check_syscall_nr: /* Check to make sure we don't jump to a bogus syscall number. */ li t0, __NR_syscalls la s0, sys_ni_syscall - /* Syscall number held in a7 */ - bgeu a7, t0, 1f + /* +* The tracer can change syscall number to valid/invalid value. +* We use syscall_set_nr helper in syscall_trace_enter thus we +* cannot trust the current value in a7 and have to reload from +* the current task pt_regs. +*/ + REG_L a7, PT_A7(sp) + /* +* Syscall number held in a7. +* If syscall number is above allowed value, redirect to ni_syscall. +*/ + bge a7, t0, 1f + /* +* Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. +* If yes, we pretend it was executed. +*/ + li t1, -1 + beq a7, t1, ret_from_syscall_rejected + /* Call syscall */ la s0, sys_call_table slli t0, a7, RISCV_LGPTR add s0, s0, t0 @@ -219,6 +236,12 @@ check_syscall_nr: ret_from_syscall: /* Set user a0 to kernel a0 */ REG_S a0, PT_A0(sp) + /* +* We didn't execute the actual syscall. +* Seccomp already set return value for the current task pt_regs. +* (If it was configured with SECCOMP_RET_ERRNO/TRACE) +*/ +ret_from_syscall_rejected: /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_WORK diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index c1b51539c3e2..598e48b8ca2b 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -160,6 +160,14 @@ void do_syscall_trace_enter(struct pt_regs *regs) if (tracehook_report_syscall_entry(regs)) syscall_set_nr(current, regs,