This patch changes syscall stack implementation to use mmap/munmap/mprotect combination instead of malloc/free for large syscall stack. This makes it possible to add guard page to detect stack overflow scenarios. Lastly new implementation also detects if tiny stack is not overflows when executing logic to setup large stack. Tiny stack had to be increased to 4096 bytes to accomodate mmap call.
Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com> --- arch/x64/arch-switch.hh | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/arch/x64/arch-switch.hh b/arch/x64/arch-switch.hh index e3769540..6655f9d2 100644 --- a/arch/x64/arch-switch.hh +++ b/arch/x64/arch-switch.hh @@ -11,6 +11,7 @@ #include "msr.hh" #include <osv/barrier.hh> #include <string.h> +#include <sys/mman.h> // // The last 16 bytes of the syscall stack are reserved for - @@ -20,14 +21,15 @@ // // The tiny stack has to be large enough to allow for execution of // thread::setup_large_syscall_stack() that allocates and sets up -// large syscall stack. It was measured that as of this writing -// setup_large_syscall_stack() needs a little over 600 bytes of stack -// to properly operate. This makes 1024 bytes to be an adequate size -// of tiny stack. +// large syscall stack. It is assumed that single page (4096 bytes) +// should be enough for this purpose. In any case we put a canary +// value in the bottom of tiny stack to verify if it was not overflown. +// // All application threads pre-allocate tiny syscall stack so there // is a tiny penalty with this solution. -#define TINY_SYSCALL_STACK_SIZE 1024 +#define TINY_SYSCALL_STACK_SIZE PAGE_SIZE #define TINY_SYSCALL_STACK_DEPTH (TINY_SYSCALL_STACK_SIZE - SYSCALL_STACK_RESERVED_SPACE_SIZE) +#define TINY_SYSCALL_STACK_CANARY (0xdeadbeafdeadbeaf) // // The large syscall stack is setup and switched to on first // execution of SYSCALL instruction for given application thread. @@ -188,6 +190,9 @@ void thread::setup_tcb() // OSv syscall stack type indicator and extra 8 bytes to make it 16-bytes aligned _tcb->syscall_stack_top = tiny_syscall_stack_begin + TINY_SYSCALL_STACK_DEPTH; SET_SYSCALL_STACK_TYPE_INDICATOR(TINY_SYSCALL_STACK_INDICATOR); + // + // Set canary value at the bottom of the TINY stack + *((long*)(tiny_syscall_stack_begin)) = TINY_SYSCALL_STACK_CANARY; } else { _tcb->syscall_stack_top = 0; @@ -199,11 +204,17 @@ void thread::setup_large_syscall_stack() assert(is_app()); assert(GET_SYSCALL_STACK_TYPE_INDICATOR() == TINY_SYSCALL_STACK_INDICATOR); // + // Check canary (a little bit paranoid) + void* tiny_syscall_stack_begin = _tcb->syscall_stack_top - TINY_SYSCALL_STACK_DEPTH; + assert(TINY_SYSCALL_STACK_CANARY == *((unsigned long*)tiny_syscall_stack_begin)); + // // Allocate LARGE syscall stack - void* large_syscall_stack_begin = malloc(LARGE_SYSCALL_STACK_SIZE); + void* large_syscall_stack_begin = mmap(NULL, LARGE_SYSCALL_STACK_SIZE, + PROT_READ|PROT_WRITE|PROT_EXEC, MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + assert(large_syscall_stack_begin != MAP_FAILED ); void* large_syscall_stack_top = large_syscall_stack_begin + LARGE_SYSCALL_STACK_DEPTH; // - // Copy all of the tiny stack to the are of last 1024 bytes of large stack. + // Copy all of the tiny stack to the area of last 4096 bytes of large stack. // This way we do not have to pop and push the same registers to be saved again. // Also the caller stack pointer is also copied. // We could have copied only last 128 (registers) + 16 bytes (2 fields) instead @@ -217,12 +228,13 @@ void thread::setup_large_syscall_stack() // that we can deallocate it in free_tiny_syscall_stack *((void**)large_syscall_stack_begin) = tiny_syscall_stack_top - TINY_SYSCALL_STACK_DEPTH; // - // Set canary value (0xDEADBEAFDEADBEAF) under bottom + 8 of LARGE stack - *((long*)(large_syscall_stack_begin + 8)) = 0xdeadbeafdeadbeaf; - // // Switch syscall stack address value in TCB to the top of the LARGE one _tcb->syscall_stack_top = large_syscall_stack_top; SET_SYSCALL_STACK_TYPE_INDICATOR(LARGE_SYSCALL_STACK_INDICATOR); + assert(0 == mprotect(large_syscall_stack_begin, 4096, PROT_READ)); + // + // Check canary of tiny stack to verify it was not overflown + assert(TINY_SYSCALL_STACK_CANARY == *((unsigned long*)tiny_syscall_stack_begin)); } void thread::free_tiny_syscall_stack() @@ -249,10 +261,10 @@ void thread::free_tcb() } if (_tcb->syscall_stack_top) { - void* syscall_stack_begin = GET_SYSCALL_STACK_TYPE_INDICATOR() == TINY_SYSCALL_STACK_INDICATOR ? - _tcb->syscall_stack_top - TINY_SYSCALL_STACK_DEPTH : - _tcb->syscall_stack_top - LARGE_SYSCALL_STACK_DEPTH; - free(syscall_stack_begin); + if(GET_SYSCALL_STACK_TYPE_INDICATOR() == TINY_SYSCALL_STACK_INDICATOR) + free(_tcb->syscall_stack_top - TINY_SYSCALL_STACK_DEPTH); + else + assert(0 == munmap(_tcb->syscall_stack_top - LARGE_SYSCALL_STACK_DEPTH, LARGE_SYSCALL_STACK_SIZE)); } } -- 2.17.1 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. For more options, visit https://groups.google.com/d/optout.