Include safefetch and support caching strategy in syscalls to protect against time of check to time of use bugs. --- arch/x86/entry/syscall_64.c | 76 +++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+)
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index b6e68ea98b83..0d5665e096a6 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -20,6 +20,30 @@ #undef __SYSCALL_NORETURN #define __SYSCALL_NORETURN __SYSCALL +#ifdef CONFIG_SAFEFETCH +#include <linux/safefetch.h> +#include <linux/region_allocator.h> +#include <linux/mem_range.h> +#include <linux/safefetch_static_keys.h> +#ifdef SAFEFETCH_WHITELISTING +#warning "Using DFCACHER whitelisting" +static noinline void should_whitelist(unsigned long syscall_nr) +{ + switch (syscall_nr) { + case __NR_futex: + case __NR_execve: + case __NR_writev: + case __NR_pwritev2: + case __NR_pwrite64: + case __NR_write: + current->df_prot_struct_head.is_whitelisted = 1; + return; + } + current->df_prot_struct_head.is_whitelisted = 0; +} +#endif +#endif + /* * The sys_call_table[] is no longer used for system calls, but * kernel/trace/trace_syscalls.c still wants to know the system @@ -87,8 +111,46 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) __visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr) { add_random_kstack_offset(); + // If interrupts using current execute prior to the next syscall + // then we will enter the syscall with the mem_range initialized + // we could chose to clean this info (shrink_region) or simply + // trust that the interrupt doesn't fetch something nasty and just + // operate the next syscall on the interrupt state (happens for + // sigaction calls mostly during IPI's that save the signal frame + // prior to executing a sigaction call). Or simply clear state + // on irq end (might slow down irqs so avoid this). +#if defined(CONFIG_SAFEFETCH) + IF_SAFEFETCH_STATIC_BRANCH_UNLIKELY_WRAPPER(safefetch_hooks_key) { + if (unlikely(SAFEFETCH_MEM_RANGE_INIT_FLAG)) { + // An IPI probably sent us a signal and the signal + // enabled the defense in interrupt context. Reset + // dfcache interrupt state. +#ifndef SAFEFETCH_DEBUG + // If in debug mode, we actually reset the range in + // df_debug_syscall_entry. + SAFEFETCH_RESET_MEM_RANGE(); +#endif + shrink_region(DF_CUR_STORAGE_REGION_ALLOCATOR); + shrink_region(DF_CUR_METADATA_REGION_ALLOCATOR); + } + } +#endif + +#ifdef SAFEFETCH_MEASURE_DEFENSE + // We only use this for measuring so execute this without the static key + // else we get into nasty scenarios if we miss this initialization step. + df_init_measure_structs(current); +#endif nr = syscall_enter_from_user_mode(regs, nr); +#if defined(CONFIG_SAFEFETCH) && defined(SAFEFETCH_WHITELISTING) + should_whitelist(nr); +#endif +#if defined(CONFIG_SAFEFETCH) && defined(SAFEFETCH_DEBUG) + IF_SAFEFETCH_STATIC_BRANCH_UNLIKELY_WRAPPER(safefetch_hooks_key) { + df_debug_syscall_entry(nr, regs); + } +#endif instrumentation_begin(); if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) { @@ -99,6 +161,20 @@ __visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr) instrumentation_end(); syscall_exit_to_user_mode(regs); +#ifdef CONFIG_SAFEFETCH + // Note, we might have rseq regions executing in syscall_exit_to_user_mode + // and irqs so delay resetting region after this. + IF_SAFEFETCH_STATIC_BRANCH_UNLIKELY_WRAPPER(safefetch_hooks_key) { +#ifdef SAFEFETCH_DEBUG + df_debug_syscall_exit(); +#endif +#ifdef SAFEFETCH_MEASURE_DEFENSE + df_destroy_measure_structs(); +#endif + reset_regions(); + } +#endif + /* * Check that the register state is valid for using SYSRET to exit * to userspace. Otherwise use the slower but fully capable IRET -- 2.25.1