From: Josh Poimboeuf <[email protected]> In preparation for using sframe to unwind user space stacks, add an sframe_find() interface for finding the sframe information associated with a given text address.
For performance, use user_read_access_begin() and the corresponding unsafe_*() accessors. Note that use of pr_debug() in uaccess-enabled regions would break noinstr validation, so there aren't any debug messages yet. That will be added in a subsequent commit. Link: https://lore.kernel.org/all/77c0d1ec143bf2a53d66c4ecb190e7e0a576fbfd.1737511963.git.jpoim...@kernel.org/ Link: https://lore.kernel.org/all/[email protected]/ [ Jens Remus: Add support for PC-relative FDE function start address. Simplify logic by using an internal SFrame FDE representation, whose FDE function start address field is an address instead of a PC-relative offset (from FDE). Rename struct sframe_fre to sframe_fre_internal to align with struct sframe_fde_internal. Cleanup includes. Fix checkpatch errors "spaces required around that ':'". ] Cc: Masami Hiramatsu <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Andrii Nakryiko <[email protected]> Cc: Indu Bhagat <[email protected]> Cc: "Jose E. Marchesi" <[email protected]> Cc: Beau Belgrave <[email protected]> Cc: Jens Remus <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Florian Weimer <[email protected]> Cc: Sam James <[email protected]> Cc: Kees Cook <[email protected]> Cc: "Carlos O'Donell" <[email protected]> Signed-off-by: Josh Poimboeuf <[email protected]> Signed-off-by: Steven Rostedt (Google) <[email protected]> Signed-off-by: Jens Remus <[email protected]> --- Notes (jremus): Changes in v12: - Simplify logic by using an internal SFrame FDE representation, whose FDE function start address field is an address instead of a PC-relative offset (from FDE). - Rename struct sframe_fre to sframe_fre_internal to align with struct sframe_fde_internal. - Add include of linux/unwind_user_types.h from "unwind_user/sframe: Add support for reading .sframe headers". - Fix checkpatch errors "spaces required around that ':'". Changes in v11: - Support for SFrame V2 PC-relative FDE function start address. include/linux/sframe.h | 6 + kernel/unwind/sframe.c | 330 ++++++++++++++++++++++++++++++++++- kernel/unwind/sframe_debug.h | 35 ++++ 3 files changed, 367 insertions(+), 4 deletions(-) create mode 100644 kernel/unwind/sframe_debug.h diff --git a/include/linux/sframe.h b/include/linux/sframe.h index 7ea6a97ed8af..9a72209696f9 100644 --- a/include/linux/sframe.h +++ b/include/linux/sframe.h @@ -3,10 +3,14 @@ #define _LINUX_SFRAME_H #include <linux/mm_types.h> +#include <linux/srcu.h> +#include <linux/unwind_user_types.h> #ifdef CONFIG_HAVE_UNWIND_USER_SFRAME struct sframe_section { + struct rcu_head rcu; + unsigned long sframe_start; unsigned long sframe_end; unsigned long text_start; @@ -27,6 +31,7 @@ extern void sframe_free_mm(struct mm_struct *mm); extern int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, unsigned long text_start, unsigned long text_end); extern int sframe_remove_section(unsigned long sframe_addr); +extern int sframe_find(unsigned long ip, struct unwind_user_frame *frame); static inline bool current_has_sframe(void) { @@ -45,6 +50,7 @@ static inline int sframe_add_section(unsigned long sframe_start, unsigned long s return -ENOSYS; } static inline int sframe_remove_section(unsigned long sframe_addr) { return -ENOSYS; } +static inline int sframe_find(unsigned long ip, struct unwind_user_frame *frame) { return -ENOSYS; } static inline bool current_has_sframe(void) { return false; } #endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */ diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 149ce70e4229..d4ef825b1cbc 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -15,9 +15,322 @@ #include <linux/unwind_user_types.h> #include "sframe.h" +#include "sframe_debug.h" + +struct sframe_fde_internal { + unsigned long func_start_addr; + u32 func_size; + u32 fres_off; + u32 fres_num; + u8 info; + u8 rep_size; +}; + +struct sframe_fre_internal { + unsigned int size; + u32 ip_off; + s32 cfa_off; + s32 ra_off; + s32 fp_off; + u8 info; +}; + +DEFINE_STATIC_SRCU(sframe_srcu); + +static __always_inline unsigned char fre_type_to_size(unsigned char fre_type) +{ + if (fre_type > 2) + return 0; + return 1 << fre_type; +} + +static __always_inline unsigned char offset_size_enum_to_size(unsigned char off_size) +{ + if (off_size > 2) + return 0; + return 1 << off_size; +} + +static __always_inline int __read_fde(struct sframe_section *sec, + unsigned int fde_num, + struct sframe_fde_internal *fde) +{ + unsigned long fde_addr, func_addr; + struct sframe_fde _fde; + + fde_addr = sec->fdes_start + (fde_num * sizeof(struct sframe_fde)); + unsafe_copy_from_user(&_fde, (void __user *)fde_addr, + sizeof(struct sframe_fde), Efault); + + func_addr = fde_addr + _fde.start_addr; + if (func_addr < sec->text_start || func_addr > sec->text_end) + return -EINVAL; + + fde->func_start_addr = func_addr; + fde->func_size = _fde.func_size; + fde->fres_off = _fde.fres_off; + fde->fres_num = _fde.fres_num; + fde->info = _fde.info; + fde->rep_size = _fde.rep_size; + + return 0; + +Efault: + return -EFAULT; +} + +static __always_inline int __find_fde(struct sframe_section *sec, + unsigned long ip, + struct sframe_fde_internal *fde) +{ + unsigned long func_addr_low = 0, func_addr_high = ULONG_MAX; + struct sframe_fde __user *first, *low, *high, *found = NULL; + int ret; + + first = (void __user *)sec->fdes_start; + low = first; + high = first + sec->num_fdes - 1; + + while (low <= high) { + struct sframe_fde __user *mid; + s32 func_off; + unsigned long func_addr; + + mid = low + ((high - low) / 2); + + unsafe_get_user(func_off, (s32 __user *)mid, Efault); + func_addr = (unsigned long)mid + func_off; + + if (ip >= func_addr) { + if (func_addr < func_addr_low) + return -EFAULT; + + func_addr_low = func_addr; + + found = mid; + low = mid + 1; + } else { + if (func_addr > func_addr_high) + return -EFAULT; + + func_addr_high = func_addr; + + high = mid - 1; + } + } + + if (!found) + return -EINVAL; + + ret = __read_fde(sec, found - first, fde); + if (ret) + return ret; + + /* make sure it's not in a gap */ + if (ip < fde->func_start_addr || + ip >= fde->func_start_addr + fde->func_size) + return -EINVAL; + + return 0; + +Efault: + return -EFAULT; +} + +#define ____UNSAFE_GET_USER_INC(to, from, type, label) \ +({ \ + type __to; \ + unsafe_get_user(__to, (type __user *)from, label); \ + from += sizeof(__to); \ + to = __to; \ +}) + +#define __UNSAFE_GET_USER_INC(to, from, size, label, u_or_s) \ +({ \ + switch (size) { \ + case 1: \ + ____UNSAFE_GET_USER_INC(to, from, u_or_s##8, label); \ + break; \ + case 2: \ + ____UNSAFE_GET_USER_INC(to, from, u_or_s##16, label); \ + break; \ + case 4: \ + ____UNSAFE_GET_USER_INC(to, from, u_or_s##32, label); \ + break; \ + default: \ + return -EFAULT; \ + } \ +}) + +#define UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label) \ + __UNSAFE_GET_USER_INC(to, from, size, label, u) + +#define UNSAFE_GET_USER_SIGNED_INC(to, from, size, label) \ + __UNSAFE_GET_USER_INC(to, from, size, label, s) + +#define UNSAFE_GET_USER_INC(to, from, size, label) \ + _Generic(to, \ + u8 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \ + u16 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \ + u32 : UNSAFE_GET_USER_UNSIGNED_INC(to, from, size, label), \ + s8 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label), \ + s16 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label), \ + s32 : UNSAFE_GET_USER_SIGNED_INC(to, from, size, label)) + +static __always_inline int __read_fre(struct sframe_section *sec, + struct sframe_fde_internal *fde, + unsigned long fre_addr, + struct sframe_fre_internal *fre) +{ + unsigned char fde_type = SFRAME_FUNC_FDE_TYPE(fde->info); + unsigned char fre_type = SFRAME_FUNC_FRE_TYPE(fde->info); + unsigned char offset_count, offset_size; + s32 cfa_off, ra_off, fp_off; + unsigned long cur = fre_addr; + unsigned char addr_size; + u32 ip_off; + u8 info; + + addr_size = fre_type_to_size(fre_type); + if (!addr_size) + return -EFAULT; + + if (fre_addr + addr_size + 1 > sec->fres_end) + return -EFAULT; + + UNSAFE_GET_USER_INC(ip_off, cur, addr_size, Efault); + if (fde_type == SFRAME_FDE_TYPE_PCINC && ip_off > fde->func_size) + return -EFAULT; + + UNSAFE_GET_USER_INC(info, cur, 1, Efault); + offset_count = SFRAME_FRE_OFFSET_COUNT(info); + offset_size = offset_size_enum_to_size(SFRAME_FRE_OFFSET_SIZE(info)); + if (!offset_count || !offset_size) + return -EFAULT; + + if (cur + (offset_count * offset_size) > sec->fres_end) + return -EFAULT; + + fre->size = addr_size + 1 + (offset_count * offset_size); + + UNSAFE_GET_USER_INC(cfa_off, cur, offset_size, Efault); + offset_count--; + + ra_off = sec->ra_off; + if (!ra_off) { + if (!offset_count--) + return -EFAULT; + + UNSAFE_GET_USER_INC(ra_off, cur, offset_size, Efault); + } -#define dbg(fmt, ...) \ - pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__) + fp_off = sec->fp_off; + if (!fp_off && offset_count) { + offset_count--; + UNSAFE_GET_USER_INC(fp_off, cur, offset_size, Efault); + } + + if (offset_count) + return -EFAULT; + + fre->ip_off = ip_off; + fre->cfa_off = cfa_off; + fre->ra_off = ra_off; + fre->fp_off = fp_off; + fre->info = info; + + return 0; + +Efault: + return -EFAULT; +} + +static __always_inline int __find_fre(struct sframe_section *sec, + struct sframe_fde_internal *fde, + unsigned long ip, + struct unwind_user_frame *frame) +{ + unsigned char fde_type = SFRAME_FUNC_FDE_TYPE(fde->info); + struct sframe_fre_internal *fre, *prev_fre = NULL; + struct sframe_fre_internal fres[2]; + unsigned long fre_addr; + bool which = false; + unsigned int i; + u32 ip_off; + + ip_off = ip - fde->func_start_addr; + + if (fde_type == SFRAME_FDE_TYPE_PCMASK) + ip_off %= fde->rep_size; + + fre_addr = sec->fres_start + fde->fres_off; + + for (i = 0; i < fde->fres_num; i++) { + int ret; + + /* + * Alternate between the two fre_addr[] entries for 'fre' and + * 'prev_fre'. + */ + fre = which ? fres : fres + 1; + which = !which; + + ret = __read_fre(sec, fde, fre_addr, fre); + if (ret) + return ret; + + fre_addr += fre->size; + + if (prev_fre && fre->ip_off <= prev_fre->ip_off) + return -EFAULT; + + if (fre->ip_off > ip_off) + break; + + prev_fre = fre; + } + + if (!prev_fre) + return -EINVAL; + fre = prev_fre; + + frame->cfa_off = fre->cfa_off; + frame->ra_off = fre->ra_off; + frame->fp_off = fre->fp_off; + frame->use_fp = SFRAME_FRE_CFA_BASE_REG_ID(fre->info) == SFRAME_BASE_REG_FP; + + return 0; +} + +int sframe_find(unsigned long ip, struct unwind_user_frame *frame) +{ + struct mm_struct *mm = current->mm; + struct sframe_section *sec; + struct sframe_fde_internal fde; + int ret; + + if (!mm) + return -EINVAL; + + guard(srcu)(&sframe_srcu); + + sec = mtree_load(&mm->sframe_mt, ip); + if (!sec) + return -EINVAL; + + if (!user_read_access_begin((void __user *)sec->sframe_start, + sec->sframe_end - sec->sframe_start)) + return -EFAULT; + + ret = __find_fde(sec, ip, &fde); + if (ret) + goto end; + + ret = __find_fre(sec, &fde, ip, frame); +end: + user_read_access_end(); + return ret; +} static void free_section(struct sframe_section *sec) { @@ -120,8 +433,10 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, sec->text_end = text_end; ret = sframe_read_header(sec); - if (ret) + if (ret) { + dbg_print_header(sec); goto err_free; + } ret = mtree_insert_range(sframe_mt, sec->text_start, sec->text_end, sec, GFP_KERNEL); if (ret) { @@ -137,6 +452,13 @@ int sframe_add_section(unsigned long sframe_start, unsigned long sframe_end, return ret; } +static void sframe_free_srcu(struct rcu_head *rcu) +{ + struct sframe_section *sec = container_of(rcu, struct sframe_section, rcu); + + free_section(sec); +} + static int __sframe_remove_section(struct mm_struct *mm, struct sframe_section *sec) { @@ -145,7 +467,7 @@ static int __sframe_remove_section(struct mm_struct *mm, return -EINVAL; } - free_section(sec); + call_srcu(&sframe_srcu, &sec->rcu, sframe_free_srcu); return 0; } diff --git a/kernel/unwind/sframe_debug.h b/kernel/unwind/sframe_debug.h new file mode 100644 index 000000000000..055c8c8fae24 --- /dev/null +++ b/kernel/unwind/sframe_debug.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SFRAME_DEBUG_H +#define _SFRAME_DEBUG_H + +#include <linux/sframe.h> +#include "sframe.h" + +#ifdef CONFIG_DYNAMIC_DEBUG + +#define dbg(fmt, ...) \ + pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__) + +static __always_inline void dbg_print_header(struct sframe_section *sec) +{ + unsigned long fdes_end; + + fdes_end = sec->fdes_start + (sec->num_fdes * sizeof(struct sframe_fde)); + + dbg("SEC: sframe:0x%lx-0x%lx text:0x%lx-0x%lx " + "fdes:0x%lx-0x%lx fres:0x%lx-0x%lx " + "ra_off:%d fp_off:%d\n", + sec->sframe_start, sec->sframe_end, sec->text_start, sec->text_end, + sec->fdes_start, fdes_end, sec->fres_start, sec->fres_end, + sec->ra_off, sec->fp_off); +} + +#else /* !CONFIG_DYNAMIC_DEBUG */ + +#define dbg(args...) no_printk(args) + +static inline void dbg_print_header(struct sframe_section *sec) {} + +#endif /* !CONFIG_DYNAMIC_DEBUG */ + +#endif /* _SFRAME_DEBUG_H */ -- 2.48.1
