On 13 Nov 2021, at 19:06, Konstantin Belousov <kostik...@gmail.com> wrote:
> On Sat, Nov 13, 2021 at 08:59:00PM +0200, Konstantin Belousov wrote:
>> On Sat, Nov 13, 2021 at 06:29:24PM +0000, Jessica Clarke wrote:
>>> On 13 Nov 2021, at 17:57, Jessica Clarke <jrt...@freebsd.org> wrote:
>>>> 
>>>> On 13 Nov 2021, at 17:54, Jessica Clarke <jrt...@freebsd.org> wrote:
>>>>> 
>>>>> On 13 Nov 2021, at 17:33, Konstantin Belousov <k...@freebsd.org> wrote:
>>>>>> 
>>>>>> The branch main has been updated by kib:
>>>>>> 
>>>>>> URL: 
>>>>>> https://cgit.FreeBSD.org/src/commit/?id=64ba1f4cf3a6847a1dacf4bab0409d94898fa168
>>>>>> 
>>>>>> commit 64ba1f4cf3a6847a1dacf4bab0409d94898fa168
>>>>>> Author:     Konstantin Belousov <k...@freebsd.org>
>>>>>> AuthorDate: 2021-11-13 01:18:13 +0000
>>>>>> Commit:     Konstantin Belousov <k...@freebsd.org>
>>>>>> CommitDate: 2021-11-13 17:33:13 +0000
>>>>>> 
>>>>>> rtld: Implement LD_SHOW_AUXV
>>>>>> 
>>>>>> It dumps auxv as seen by interpreter, right before starting any user
>>>>>> code.
>>>>>> 
>>>>>> Copied from:    glibc
>>>>>> Sponsored by:   The FreeBSD Foundation
>>>>>> MFC after:      1 week
>>>>>> ---
>>>>>> libexec/rtld-elf/rtld.1 |  7 +++++-
>>>>>> libexec/rtld-elf/rtld.c | 67 
>>>>>> +++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>> 2 files changed, 73 insertions(+), 1 deletion(-)
>>>>>> 
>>>>>> diff --git a/libexec/rtld-elf/rtld.1 b/libexec/rtld-elf/rtld.1
>>>>>> index 187dc105667a..66aa2bdabd17 100644
>>>>>> --- a/libexec/rtld-elf/rtld.1
>>>>>> +++ b/libexec/rtld-elf/rtld.1
>>>>>> @@ -28,7 +28,7 @@
>>>>>> .\"
>>>>>> .\" $FreeBSD$
>>>>>> .\"
>>>>>> -.Dd August 15, 2021
>>>>>> +.Dd November 13, 2021
>>>>>> .Dt RTLD 1
>>>>>> .Os
>>>>>> .Sh NAME
>>>>>> @@ -309,6 +309,11 @@ will process the filtee dependencies of the loaded 
>>>>>> objects immediately,
>>>>>> instead of postponing it until required.
>>>>>> Normally, the filtees are opened at the time of the first symbol 
>>>>>> resolution
>>>>>> from the filter object.
>>>>>> +.It Ev LD_SHOW_AUXV
>>>>>> +If set, causes
>>>>>> +.Nm
>>>>>> +to dump content of the aux vector to standard output, before passing
>>>>>> +control to any user code.
>>>>>> .El
>>>>>> .Sh DIRECT EXECUTION MODE
>>>>>> .Nm
>>>>>> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
>>>>>> index c173c5a6e22e..0475134b0d96 100644
>>>>>> --- a/libexec/rtld-elf/rtld.c
>>>>>> +++ b/libexec/rtld-elf/rtld.c
>>>>>> @@ -104,6 +104,7 @@ static Obj_Entry *dlopen_object(const char *name, 
>>>>>> int fd, Obj_Entry *refobj,
>>>>>> static Obj_Entry *do_load_object(int, const char *, char *, struct stat 
>>>>>> *, int);
>>>>>> static int do_search_info(const Obj_Entry *obj, int, struct dl_serinfo 
>>>>>> *);
>>>>>> static bool donelist_check(DoneList *, const Obj_Entry *);
>>>>>> +static void dump_auxv(Elf_Auxinfo **aux_info);
>>>>>> static void errmsg_restore(struct dlerror_save *);
>>>>>> static struct dlerror_save *errmsg_save(void);
>>>>>> static void *fill_search_info(const char *, size_t, void *);
>>>>>> @@ -364,6 +365,7 @@ enum {
>>>>>>  LD_TRACE_LOADED_OBJECTS_FMT1,
>>>>>>  LD_TRACE_LOADED_OBJECTS_FMT2,
>>>>>>  LD_TRACE_LOADED_OBJECTS_ALL,
>>>>>> +        LD_SHOW_AUXV,
>>>>>> };
>>>>>> 
>>>>>> struct ld_env_var_desc {
>>>>>> @@ -396,6 +398,7 @@ static struct ld_env_var_desc ld_env_vars[] = {
>>>>>>  LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT1, false),
>>>>>>  LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT2, false),
>>>>>>  LD_ENV_DESC(TRACE_LOADED_OBJECTS_ALL, false),
>>>>>> +        LD_ENV_DESC(SHOW_AUXV, false),
>>>>>> };
>>>>>> 
>>>>>> static const char *
>>>>>> @@ -857,6 +860,9 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, 
>>>>>> Obj_Entry **objp)
>>>>>>  if (rtld_verify_versions(&list_main) == -1 && !ld_tracing)
>>>>>>  rtld_die();
>>>>>> 
>>>>>> +    if (ld_get_env_var(LD_SHOW_AUXV) != NULL)
>>>>>> +       dump_auxv(aux_info);
>>>>>> +
>>>>>>  if (ld_tracing) {               /* We're done */
>>>>>>  trace_loaded_objects(obj_main);
>>>>>>  exit(0);
>>>>>> @@ -6058,6 +6064,67 @@ print_usage(const char *argv0)
>>>>>>      "  <args>    Arguments to the executed process\n", argv0);
>>>>>> }
>>>>>> 
>>>>>> +#define AUXFMT(at, xfmt) [at] = { .name = #at, .fmt = xfmt }
>>>>>> +static const struct auxfmt {
>>>>>> +        const char *name;
>>>>>> +        const char *fmt;
>>>>>> +} auxfmts[] = {
>>>>>> +        AUXFMT(AT_NULL, NULL),
>>>>>> +        AUXFMT(AT_IGNORE, NULL),
>>>>>> +        AUXFMT(AT_EXECFD, "%d"),
>>>>>> +        AUXFMT(AT_PHDR, "%p"),
>>>>>> +        AUXFMT(AT_PHENT, "%u"),
>>>>>> +        AUXFMT(AT_PHNUM, "%u"),
>>>>>> +        AUXFMT(AT_PAGESZ, "%u"),
>>>>>> +        AUXFMT(AT_BASE, "%#lx"),
>>>>>> +        AUXFMT(AT_FLAGS, "%#lx"),
>>>>>> +        AUXFMT(AT_ENTRY, "%p"),
>>>>>> +        AUXFMT(AT_NOTELF, NULL),
>>>>>> +        AUXFMT(AT_UID, "%d"),
>>>>>> +        AUXFMT(AT_EUID, "%d"),
>>>>>> +        AUXFMT(AT_GID, "%d"),
>>>>>> +        AUXFMT(AT_EGID, "%d"),
>>>>>> +        AUXFMT(AT_EXECPATH, "%s"),
>>>>>> +        AUXFMT(AT_CANARY, "%p"),
>>>>>> +        AUXFMT(AT_CANARYLEN, "%u"),
>>>>>> +        AUXFMT(AT_OSRELDATE, "%u"),
>>>>>> +        AUXFMT(AT_NCPUS, "%u"),
>>>>>> +        AUXFMT(AT_PAGESIZES, "%p"),
>>>>>> +        AUXFMT(AT_PAGESIZESLEN, "%u"),
>>>>>> +        AUXFMT(AT_TIMEKEEP, "%p"),
>>>>>> +        AUXFMT(AT_STACKPROT, "%#x"),
>>>>>> +        AUXFMT(AT_EHDRFLAGS, "%#lx"),
>>>>>> +        AUXFMT(AT_HWCAP, "%#lx"),
>>>>>> +        AUXFMT(AT_HWCAP2, "%#lx"),
>>>>>> +        AUXFMT(AT_BSDFLAGS, "%#lx"),
>>>>>> +        AUXFMT(AT_ARGC, "%u"),
>>>>>> +        AUXFMT(AT_ARGV, "%p"),
>>>>>> +        AUXFMT(AT_ENVC, "%p"),
>>>>>> +        AUXFMT(AT_ENVV, "%p"),
>>>>>> +        AUXFMT(AT_PS_STRINGS, "%p"),
>>>>>> +        AUXFMT(AT_FXRNG, "%p"),
>>>>>> +};
>>>>>> +
>>>>>> +static void
>>>>>> +dump_auxv(Elf_Auxinfo **aux_info)
>>>>>> +{
>>>>>> +        Elf_Auxinfo *auxp;
>>>>>> +        const struct auxfmt *fmt;
>>>>>> +        int i;
>>>>>> +
>>>>>> +        for (i = 0; i < AT_COUNT; i++) {
>>>>>> +                auxp = aux_info[i];
>>>>>> +                if (auxp == NULL)
>>>>>> +                        continue;
>>>>>> +                fmt = &auxfmts[i];
>>>>>> +                if (fmt->fmt == NULL)
>>>>>> +                        continue;
>>>>>> +                rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name);
>>>>>> +                rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, 
>>>>>> auxp->a_un.a_ptr);
>>>>>> +                rtld_fdprintf(STDOUT_FILENO, "\n");
>>>>> 
>>>>> This is undefined behaviour, breaks CHERI, and totally unnecessary. You
>>>>> have a handful of cases here, just make an enum and have separate
>>>>> rtld_fdprintf calls.
>>> 
>>> In particular, ignoring CHERI, unsigned ints are sign-extended to 64
>>> bits on MIPS and RISC-V. Thus by passing a 64-bit value but using a %u,
>>> you are violating the calling convention. I can’t currently get GCC or
>>> Clang to exploit the fact that varargs arguments are sign-extended, but
>>> on MIPS, and RISC-V GCC (Clang is currently stupid and round-trips via
>>> memory even when the va_arg calls have no branching surrounding them,
>>> rather than just grabbing from the register) there is a redundant
>>> sext.w that can legally be optimised out, but would be broken by this
>>> calling convention violation.
>> I might understand the argument that all non-pointer formats for auxv
>> should be longs, i.e. %lu/%ld/%lx, but this is the only problem I see
>> there. We do rely on having specific representations for addresses and
>> longs, and a low-level component as rtld has full rights to exercise
>> this fact, same as VM subsystem or memory allocators.
>> 
>> In fact ELF spec exercises this as well.
>> Our arches are either ILP32 or LP64.
>> 
>>> 
>>> Then CHERI makes it worse because a_ptr and a_val do not have the same
>>> representation, although in practice I think passing a_ptr and nothing
>>> further does end up working on CHERI-RISC-V and Morello, just not
>>> CHERI-MIPS due to being big-endian.
> 
> Ok, the following should be enough for CHERI, right?
> 
> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
> index 0475134b0d96..cf467ae7aacd 100644
> --- a/libexec/rtld-elf/rtld.c
> +++ b/libexec/rtld-elf/rtld.c
> @@ -6071,33 +6071,33 @@ static const struct auxfmt {
> } auxfmts[] = {
>       AUXFMT(AT_NULL, NULL),
>       AUXFMT(AT_IGNORE, NULL),
> -     AUXFMT(AT_EXECFD, "%d"),
> +     AUXFMT(AT_EXECFD, "%ld"),
>       AUXFMT(AT_PHDR, "%p"),
> -     AUXFMT(AT_PHENT, "%u"),
> -     AUXFMT(AT_PHNUM, "%u"),
> -     AUXFMT(AT_PAGESZ, "%u"),
> +     AUXFMT(AT_PHENT, "%lu"),
> +     AUXFMT(AT_PHNUM, "%lu"),
> +     AUXFMT(AT_PAGESZ, "%lu"),
>       AUXFMT(AT_BASE, "%#lx"),
>       AUXFMT(AT_FLAGS, "%#lx"),
>       AUXFMT(AT_ENTRY, "%p"),
>       AUXFMT(AT_NOTELF, NULL),
> -     AUXFMT(AT_UID, "%d"),
> -     AUXFMT(AT_EUID, "%d"),
> -     AUXFMT(AT_GID, "%d"),
> -     AUXFMT(AT_EGID, "%d"),
> +     AUXFMT(AT_UID, "%ld"),
> +     AUXFMT(AT_EUID, "%ld"),
> +     AUXFMT(AT_GID, "%ld"),
> +     AUXFMT(AT_EGID, "%ld"),
>       AUXFMT(AT_EXECPATH, "%s"),
>       AUXFMT(AT_CANARY, "%p"),
> -     AUXFMT(AT_CANARYLEN, "%u"),
> -     AUXFMT(AT_OSRELDATE, "%u"),
> -     AUXFMT(AT_NCPUS, "%u"),
> +     AUXFMT(AT_CANARYLEN, "%lu"),
> +     AUXFMT(AT_OSRELDATE, "%lu"),
> +     AUXFMT(AT_NCPUS, "%lu"),
>       AUXFMT(AT_PAGESIZES, "%p"),
> -     AUXFMT(AT_PAGESIZESLEN, "%u"),
> +     AUXFMT(AT_PAGESIZESLEN, "%lu"),
>       AUXFMT(AT_TIMEKEEP, "%p"),
> -     AUXFMT(AT_STACKPROT, "%#x"),
> +     AUXFMT(AT_STACKPROT, "%#lx"),
>       AUXFMT(AT_EHDRFLAGS, "%#lx"),
>       AUXFMT(AT_HWCAP, "%#lx"),
>       AUXFMT(AT_HWCAP2, "%#lx"),
>       AUXFMT(AT_BSDFLAGS, "%#lx"),
> -     AUXFMT(AT_ARGC, "%u"),
> +     AUXFMT(AT_ARGC, "%lu"),
>       AUXFMT(AT_ARGV, "%p"),
>       AUXFMT(AT_ENVC, "%p"),
>       AUXFMT(AT_ENVV, "%p"),
> @@ -6105,6 +6105,15 @@ static const struct auxfmt {
>       AUXFMT(AT_FXRNG, "%p"),
> };
> 
> +static bool
> +is_ptr_fmt(const char *fmt)
> +{
> +     char last;
> +
> +     last = fmt[strlen(fmt) - 1];
> +     return (last == 'p' || last == 's');
> +}
> +
> static void
> dump_auxv(Elf_Auxinfo **aux_info)
> {
> @@ -6120,7 +6129,8 @@ dump_auxv(Elf_Auxinfo **aux_info)
>               if (fmt->fmt == NULL)
>                       continue;
>               rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name);
> -             rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, auxp->a_un.a_ptr);
> +             rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, is_ptr_fmt(fmt->fmt) ?
> +                 auxp->a_un.a_ptr : auxp->a_un.a_val);
>               rtld_fdprintf(STDOUT_FILENO, "\n");
>       }
> }

That should indeed work, though I’d argue it’s still not as nice as
avoiding rtld_fdprintfx entirely.

Jess


Reply via email to