On 5/5/2026 2:17 PM, Jens Remus wrote:
> From: Josh Poimboeuf <[email protected]>
> 
> In preparation for using sframe to unwind user space stacks, add an
> sframe_find() interface for finding the sframe information associated
> with a given text address.
> 
> For performance, use user_read_access_begin() and the corresponding
> unsafe_*() accessors.  Note that use of pr_debug() in uaccess-enabled
> regions would break noinstr validation, so there aren't any debug
> messages yet.  That will be added in a subsequent commit.
> 
> Link: 
> https://lore.kernel.org/all/77c0d1ec143bf2a53d66c4ecb190e7e0a576fbfd.1737511963.git.jpoim...@kernel.org/
> Link: 
> https://lore.kernel.org/all/[email protected]/
> 
> [ Jens Remus: Add initial support for SFrame V3 (limited to regular
> FDEs).  Add support for PC-relative FDE function start offset.  Simplify
> logic by using an internal FDE representation.  Rename struct sframe_fre
> to sframe_fre_internal to align with struct sframe_fde_internal.
> Cleanup includes.  Fix checkpatch errors "spaces required around that
> ':'". ]

> diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c

> +static __always_inline int __read_fde(struct sframe_section *sec,
> +                                   unsigned int fde_num,
> +                                   struct sframe_fde_internal *fde)
> +{
> +     unsigned long fde_addr, fda_addr, func_addr;

        unsigned long fde_addr, fda_addr, func_start, func_end;

> +     struct sframe_fde_v3 _fde;
> +     struct sframe_fda_v3 _fda;
> +
> +     fde_addr = sec->fdes_start + (fde_num * sizeof(struct sframe_fde_v3));
> +     unsafe_copy_from_user(&_fde, (void __user *)fde_addr,
> +                           sizeof(struct sframe_fde_v3), Efault);
> +
> +     func_addr = fde_addr + _fde.func_start_off;
> +     if (func_addr < sec->text_start || func_addr >= sec->text_end)
> +             return -EINVAL;

        func_start = fde_addr + _fde.func_start_off;
        func_end = func_start + _fde.func_size;
        if (func_start < sec->text_start || func_end > sec->text_end)
                return -EINVAL;

This would validate that the whole function described by the FDE is
within the text section and not only the function start.


Note that, unrelated to above change, this check in general causes
sframe_validate_section() to fail, if one sframe section covers more
than one text section (unrelated to whether it is actually registered
for multiple text sections), for instance in case of Dylan's sframe
kernel stacktracer on arm64.  Should the check therefore be made
conditional on whether __read_fde() is called from __find_fde() or
sframe_validate_section()?  Or shall we drop this check as it does not
provide that much benefit during normal stacktracing use:
- sframe_find() obtains the struct sframe_section *sec from the
  mm->sframe_mt based on IP.  So IP must be within sec->text_start and
  sec->text_end.
- __find_fde() only returns a FDE, if the IP is within
  [fde->func_addr, fde->func_addr + fde->func_size[.
Dropping the check would allow the function start/end to be outside the
text section.

> +
> +     fda_addr = sec->fres_start + _fde.fres_off;
> +     if (fda_addr + sizeof(struct sframe_fda_v3) > sec->fres_end)
> +             return -EINVAL;
> +     unsafe_copy_from_user(&_fda, (void __user *)fda_addr,
> +                           sizeof(struct sframe_fda_v3), Efault);

Can unsafe_copy_from_user() be used for unaligned fda_addr, at least
on x86-64, s390 64-bit, and amr64?


Do the FDE type, FDE PC type, and FRE type values need to be validated
here as well?

        unsigned char fde_type = SFRAME_V3_FDE_TYPE(_fda.info2);
        unsigned char fde_pctype = SFRAME_V3_FDE_PCTYPE(_fda.info);
        unsigned char fre_type = SFRAME_V3_FDE_FRE_TYPE(_fda.info);

The FDE type would get validatd by __read_fre_datawords(), which is
called after __read_fde(), if the read FDE is the one of interest.
So that does not neccessarily need to be checked here.  Do you agree?

The FDE PC type is currently not checked for supported values anywhere.
That one would make sense to be checked here:

        if (fde_pctype != SFRAME_FDE_PCTYPE_INC &&
            fde_pctype != SFRAME_FDE_PCTYPE_MASK)
                return -EINVAL;

The FRE type would get validated by __read_fre(), which is called
somewhere down the line after __read_fde().  So that does not
neccesarily need to be checked here.   Do you agree?

> +
> +     fde->func_addr  = func_addr;

        fde->func_addr  = func_start;

> +     fde->func_size  = _fde.func_size;
> +     fde->fda_off    = _fde.fres_off;
> +     fde->fres_off   = _fde.fres_off + sizeof(struct sframe_fda_v3);
> +     fde->fres_num   = _fda.fres_num;
> +     fde->info       = _fda.info;
> +     fde->info2      = _fda.info2;
> +     fde->rep_size   = _fda.rep_size;
> +
> +     return 0;
> +
> +Efault:
> +     return -EFAULT;
> +}

> +static __always_inline int __read_fre(struct sframe_section *sec,
> +                                   struct sframe_fde_internal *fde,
> +                                   unsigned long fre_addr,
> +                                   struct sframe_fre_internal *fre)
> +{
> +     unsigned char fde_type = SFRAME_V3_FDE_TYPE(fde->info2);
> +     unsigned char fde_pctype = SFRAME_V3_FDE_PCTYPE(fde->info);
> +     unsigned char fre_type = SFRAME_V3_FDE_FRE_TYPE(fde->info);
> +     unsigned char dataword_count, dataword_size;
> +     s32 cfa_off, ra_off, fp_off;
> +     unsigned long cur = fre_addr;
> +     unsigned char addr_size;
> +     u32 ip_off;
> +     u8 info;
> +
> +     addr_size = fre_type_to_size(fre_type);
> +     if (!addr_size)
> +             return -EFAULT;
> +
> +     if (fre_addr + addr_size + 1 > sec->fres_end)
> +             return -EFAULT;
> +
> +     UNSAFE_GET_USER_INC(ip_off, cur, addr_size, Efault);
> +     if (fde_pctype == SFRAME_FDE_PCTYPE_INC && ip_off > fde->func_size)

        if ((fde_pctype == SFRAME_FDE_PCTYPE_INC && ip_off >= fde->func_size) ||
            (fde_pctype == SFRAME_FDE_PCTYPE_MASK && ip_off >= fde->rep_size))

For PCTYPE_INC the FRE IP offset must be less than the FDE function size.
For PCTYPE_MASK the FRE IP offset must be less than the FDE repetition size.

> +             return -EFAULT;
> +
> +     UNSAFE_GET_USER_INC(info, cur, 1, Efault);
> +     dataword_count = SFRAME_V3_FRE_DATAWORD_COUNT(info);
> +     dataword_size  = 
> dataword_size_enum_to_size(SFRAME_V3_FRE_DATAWORD_SIZE(info));
> +     if (!dataword_count || !dataword_size)
> +             return -EFAULT;
> +
> +     if (cur + (dataword_count * dataword_size) > sec->fres_end)
> +             return -EFAULT;
> +
> +     /* TODO: Support for flexible FDEs not implemented yet. */
> +     if (fde_type != SFRAME_FDE_TYPE_DEFAULT)
> +             return -EFAULT;
> +
> +     UNSAFE_GET_USER_INC(cfa_off, cur, dataword_size, Efault);
> +     dataword_count--;
> +
> +     ra_off = sec->ra_off;
> +     if (!ra_off) {
> +             if (!dataword_count--)
> +                     return -EFAULT;
> +
> +             UNSAFE_GET_USER_INC(ra_off, cur, dataword_size, Efault);
> +     }
> +
> +     fp_off = sec->fp_off;
> +     if (!fp_off && dataword_count) {
> +             dataword_count--;
> +             UNSAFE_GET_USER_INC(fp_off, cur, dataword_size, Efault);
> +     }
> +
> +     if (dataword_count)
> +             return -EFAULT;
> +
> +     fre->size       = addr_size + 1 + (dataword_count * dataword_size);
> +     fre->ip_off     = ip_off;
> +     fre->cfa_off    = cfa_off;
> +     fre->ra_off     = ra_off;
> +     fre->fp_off     = fp_off;
> +     fre->info       = info;
> +
> +     return 0;
> +
> +Efault:
> +     return -EFAULT;
> +}
Thanks and regards,
Jens
-- 
Jens Remus
Linux on Z Development (D3303)
[email protected] / [email protected]

IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: 
Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: 
Ehningen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/


Reply via email to