Hi Tom, On Fri, Oct 23, 2020 at 09:58:34PM -0600, Tom Tromey wrote: > PR 26773 points out that some sleb128 values are decoded incorrectly. > Looking into this, I found some other unusual cases as well. > > In this patch, I chose to try to handle weird leb128 encodings by > preserving their values when possible; or returning the maximum value > on overflow. It isn't clear to me that this is necessarily the best > choice.
I think it is a good thing to accept "padded" leb128 numbers, but probably up to a point. The padding will probably be either mod-4 or mod-8 bytes. So lets at most read up to 16 bytes total. Note we use a Signed-off-by like as described in the CONTRIBUTING document. > --- > .gitignore | 1 + > ChangeLog | 4 + > libdw/ChangeLog | 11 +++ > libdw/dwarf_getlocation.c | 2 +- > libdw/memory-access.h | 153 +++++++++++++++++++----------------- > tests/ChangeLog | 7 ++ > tests/Makefile.am | 8 +- > tests/leb128.c | 161 ++++++++++++++++++++++++++++++++++++++ > 8 files changed, 272 insertions(+), 75 deletions(-) > create mode 100644 tests/leb128.c > > diff --git a/.gitignore b/.gitignore > index c9790941..d737b14d 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -151,6 +151,7 @@ Makefile.in > /tests/get-units-invalid > /tests/get-units-split > /tests/hash > +/tests/leb128 > /tests/line2addr > /tests/low_high_pc > /tests/msg_tst > diff --git a/ChangeLog b/ChangeLog > index 72e8397c..0b50578d 100644 > --- a/ChangeLog > +++ b/ChangeLog > @@ -1,3 +1,7 @@ > +2020-10-23 Tom Tromey <t...@tromey.com> > + > + * .gitignore: Add /tests/leb128. > + > 2020-10-01 Frank Ch. Eigler <f...@redhat.com> Nice, I always forget that. > PR25461 > diff --git a/libdw/ChangeLog b/libdw/ChangeLog > index 8b0b583a..23081c12 100644 > --- a/libdw/ChangeLog > +++ b/libdw/ChangeLog > @@ -1,3 +1,14 @@ > +2020-10-23 Tom Tromey <t...@tromey.com> > + > + PR26773 > + * dwarf_getlocation.c (store_implicit_value): Use > + __libdw_get_uleb128_unchecked. > + * memory-access.h (__libdw_max_len_leb128) > + (__libdw_max_len_uleb128, __libdw_max_len_sleb128): Remove. > + (__libdw_get_uleb128, __libdw_get_uleb128_unchecked): Rewrite. > + (get_sleb128_step): Remove. > + (__libdw_get_sleb128, __libdw_get_sleb128_unchecked): Rewrite. > + > 2020-09-03 Mark Wielaard <m...@klomp.org> > > * dwarf.h: Add DW_CFA_AARCH64_negate_ra_state. > diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c > index 4617f9e9..0ce2e08a 100644 > --- a/libdw/dwarf_getlocation.c > +++ b/libdw/dwarf_getlocation.c > @@ -130,7 +130,7 @@ store_implicit_value (Dwarf *dbg, void **cache, Dwarf_Op > *op) > struct loc_block_s *block = libdw_alloc (dbg, struct loc_block_s, > sizeof (struct loc_block_s), 1); > const unsigned char *data = (const unsigned char *) (uintptr_t) > op->number2; > - uint64_t len = __libdw_get_uleb128 (&data, data + len_leb128 (Dwarf_Word)); > + uint64_t len = __libdw_get_uleb128_unchecked (&data); > if (unlikely (len != op->number)) > return -1; > block->addr = op; Well spotted. So when we read the actual Dwarf_Op we have already read len as op->number from the same data location. So this time we can read it "unchecked". Note that we didn't used to do the sanity check len == op->number before commit commit b7a5bc8aa ("libdw: Detect bad DWARF in store_implicit_value.") I think we can also simply drop it again. It really is only an internal sanity check, I don't believe it can actually catch bad DWARF. > diff --git a/libdw/memory-access.h b/libdw/memory-access.h > index a39ad6d2..70317d81 100644 > --- a/libdw/memory-access.h > +++ b/libdw/memory-access.h > @@ -39,29 +39,6 @@ > > #define len_leb128(var) ((8 * sizeof (var) + 6) / 7) > > -static inline size_t > -__libdw_max_len_leb128 (const size_t type_len, > - const unsigned char *addr, const unsigned char *end) > -{ > - const size_t pointer_len = likely (addr < end) ? end - addr : 0; > - return likely (type_len <= pointer_len) ? type_len : pointer_len; > -} > - > -static inline size_t > -__libdw_max_len_uleb128 (const unsigned char *addr, const unsigned char *end) > -{ > - const size_t type_len = len_leb128 (uint64_t); > - return __libdw_max_len_leb128 (type_len, addr, end); > -} > - > -static inline size_t > -__libdw_max_len_sleb128 (const unsigned char *addr, const unsigned char *end) > -{ > - /* Subtract one step, so we don't shift into sign bit. */ > - const size_t type_len = len_leb128 (int64_t) - 1; > - return __libdw_max_len_leb128 (type_len, addr, end); > -} OK, we don't need any of these anymore. > #define get_uleb128_step(var, addr, nth) \ > do { > \ > unsigned char __b = *(addr)++; \ > @@ -79,12 +56,39 @@ __libdw_get_uleb128 (const unsigned char **addrp, const > unsigned char *end) > for the common single-byte case. */ > get_uleb128_step (acc, *addrp, 0); > > - const size_t max = __libdw_max_len_uleb128 (*addrp - 1, end); > - for (size_t i = 1; i < max; ++i) > + const size_t max = len_leb128 (acc) - 1; > + for (size_t i = 1; i < max && *addrp < end; ++i) > get_uleb128_step (acc, *addrp, i); > - /* Other implementations set VALUE to UINT_MAX in this > - case. So we better do this as well. */ > - return UINT64_MAX; > + > + /* A single step to check the final byte, which could cause > + overflow. */ > + if (*addrp < end) > + { > + const unsigned mask = (1u << (8 * sizeof (acc) - 7 * max)) - 1; Shouldn't this be a const unsigned char? Also, this seems to actually me a constant (since *addrp < end, we must have ended up at max). Is the really just (1u << (8 * 8 - 7 * 9)) - 1 = (1u << 1) - 1 = 1 So we are really only interested in the lowest bit? > + if (unlikely ((**addrp & mask) != 0)) > + { > + /* Overflow. */ > + ++*addrp; > + return UINT64_MAX; > + } > + get_uleb128_step (acc, *addrp, max); > + } I must admit I don't fully follow the logic of this last step, but that might be because I am confused about the mask. > + /* Now we can read any number of bytes, as long as the payload is 0. > + Anything else would overflow. */ > + while (*addrp < end) > + { > + unsigned char b = *(*addrp)++; > + if (b == 0x0) > + break; > + if (b != 0x80) > + { > + /* Overflow. */ > + return UINT64_MAX; > + } > + } So I would also restrict this to i < 16 (assuming you keep i above). > + return acc; > } > > static inline uint64_t > @@ -96,65 +100,72 @@ __libdw_get_uleb128_unchecked (const unsigned char > **addrp) > for the common single-byte case. */ > get_uleb128_step (acc, *addrp, 0); > > - const size_t max = len_leb128 (uint64_t); > + const size_t max = len_leb128 (acc) - 1; > for (size_t i = 1; i < max; ++i) > get_uleb128_step (acc, *addrp, i); > - /* Other implementations set VALUE to UINT_MAX in this > - case. So we better do this as well. */ > - return UINT64_MAX; > + > + /* A single step to check the final byte, which could cause > + overflow. */ > + const unsigned mask = (1u << (8 * sizeof (acc) - 7 * max)) - 1; > + if (unlikely ((**addrp & mask) != 0)) > + { > + /* Overflow. */ > + ++*addrp; > + return UINT64_MAX; > + } > + get_uleb128_step (acc, *addrp, max); > + > + /* Now we can read any number of bytes, as long as the payload is 0. > + Anything else would overflow. */ > + while (true) > + { > + unsigned char b = *(*addrp)++; > + if (b == 0x0) > + break; > + if (b != 0x80) > + { > + /* Overflow. */ > + return UINT64_MAX; > + } > + } > + > + return acc; > } Same comments as above. > /* Note, addr needs to me smaller than end. */ > #define get_uleb128(var, addr, end) ((var) = __libdw_get_uleb128 (&(addr), > end)) > #define get_uleb128_unchecked(var, addr) ((var) = > __libdw_get_uleb128_unchecked (&(addr))) > > -/* The signed case is similar, but we sign-extend the result. */ > - > -#define get_sleb128_step(var, addr, nth) \ > - do { > \ > - unsigned char __b = *(addr)++; \ > - if (likely ((__b & 0x80) == 0)) \ > - { > \ > - struct { signed int i:7; } __s = { .i = __b }; \ > - (var) |= (typeof (var)) __s.i * ((typeof (var)) 1 << ((nth) * 7)); \ > - return (var); \ > - } > \ > - (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7); \ > - } while (0) > - > static inline int64_t > __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end) > { > - int64_t acc = 0; > - > - /* Unroll the first step to help the compiler optimize > - for the common single-byte case. */ > - get_sleb128_step (acc, *addrp, 0); > - > - const size_t max = __libdw_max_len_sleb128 (*addrp - 1, end); > - for (size_t i = 1; i < max; ++i) > - get_sleb128_step (acc, *addrp, i); > - /* Other implementations set VALUE to INT_MAX in this > - case. So we better do this as well. */ > - return INT64_MAX; > + const unsigned char *start = *addrp; > + uint64_t value = __libdw_get_uleb128 (addrp, end); > + if (unlikely (value == UINT64_MAX)) > + return INT64_MAX; Here you need to help me out a bit, wouldn't UINT64_MAX also be the representation of -1? > + if (((*addrp)[-1] & 0x40) != 0) > + { > + size_t nbytes = *addrp - start; > + if (7 * nbytes < 8 * sizeof (value)) > + value |= -(((typeof (value)) 1) << 7 * nbytes); > + } > + return value; > } OK, you can always safely use [-1] because addrp will have been increased at least by one by __libdw_get_uleb128. value is uint64_t so the left shift will always be fine. Negating 1 shifted left one beyond what we believe is the largest but (7 * nbytes) will negate But nbytes can be so large that shifting left will go beyond the end, which we don't want because then we get zero. OK But what I don't fully understand is how this works with a "padded" leb128, won't we possibly forget to negate in that case? > static inline int64_t > __libdw_get_sleb128_unchecked (const unsigned char **addrp) > { > - int64_t acc = 0; > - > - /* Unroll the first step to help the compiler optimize > - for the common single-byte case. */ > - get_sleb128_step (acc, *addrp, 0); > - > - /* Subtract one step, so we don't shift into sign bit. */ > - const size_t max = len_leb128 (int64_t) - 1; > - for (size_t i = 1; i < max; ++i) > - get_sleb128_step (acc, *addrp, i); > - /* Other implementations set VALUE to INT_MAX in this > - case. So we better do this as well. */ > - return INT64_MAX; > + const unsigned char *start = *addrp; > + uint64_t value = __libdw_get_uleb128_unchecked (addrp); > + if (unlikely (value == UINT64_MAX)) > + return INT64_MAX; > + if (((*addrp)[-1] & 0x40) != 0) > + { > + size_t nbytes = *addrp - start; > + if (7 * nbytes < 8 * sizeof (value)) > + value |= -(((typeof (value)) 1) << 7 * nbytes); > + } > + return value; > } Same comments as above. > #define get_sleb128(var, addr, end) ((var) = __libdw_get_sleb128 (&(addr), > end)) > diff --git a/tests/ChangeLog b/tests/ChangeLog > index aa68ffd3..5f075df8 100644 > --- a/tests/ChangeLog > +++ b/tests/ChangeLog > @@ -1,3 +1,10 @@ > +2020-10-22 Tom Tromey <t...@tromey.com> > + > + PR26773 > + * Makefile.am (check_PROGRAMS, TESTS): Add leb128. > + (leb128_LDADD): New variable. > + * leb128.c: New file. > + > 2020-10-20 Frank Ch. Eigler <f...@redhat.com> > > PR26756: more prometheus metrics > diff --git a/tests/Makefile.am b/tests/Makefile.am > index 9d0707da..568a9f8d 100644 > --- a/tests/Makefile.am > +++ b/tests/Makefile.am > @@ -1,6 +1,6 @@ > ## Process this file with automake to create Makefile.in > ## > -## Copyright (C) 1996-2019 Red Hat, Inc. > +## Copyright (C) 1996-2020 Red Hat, Inc. You may claim your own copyright. > ## This file is part of elfutils. > ## > ## This file is free software; you can redistribute it and/or modify > @@ -63,7 +63,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx > scnnames sectiondump \ > all-dwarf-ranges unit-info next_cfi \ > elfcopy addsections xlate_notes elfrdwrnop \ > dwelf_elf_e_machine_string \ > - getphdrnum > + getphdrnum leb128 > > asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \ > asm-tst6 asm-tst7 asm-tst8 asm-tst9 > @@ -185,7 +185,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh > newfile test-nlist \ > run-elfclassify.sh run-elfclassify-self.sh \ > run-disasm-riscv64.sh \ > run-pt_gnu_prop-tests.sh \ > - run-getphdrnum.sh run-test-includes.sh > + run-getphdrnum.sh run-test-includes.sh \ > + leb128 > > if !BIARCH > export ELFUTILS_DISABLE_BIARCH = 1 > @@ -694,6 +695,7 @@ xlate_notes_LDADD = $(libelf) > elfrdwrnop_LDADD = $(libelf) > dwelf_elf_e_machine_string_LDADD = $(libelf) $(libdw) > getphdrnum_LDADD = $(libelf) $(libdw) > +leb128_LDADD = $(libelf) $(libdw) Boilerplate looks good. > # We want to test the libelf header against the system elf.h header. > # Don't include any -I CPPFLAGS. Except when we install our own elf.h. > diff --git a/tests/leb128.c b/tests/leb128.c > new file mode 100644 > index 00000000..856a2b89 > --- /dev/null > +++ b/tests/leb128.c > @@ -0,0 +1,161 @@ > +/* Test program for leb128 > + Copyright (C) 2020 Tom Tromey > + This file is part of elfutils. > + > + This file is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3 of the License, or > + (at your option) any later version. > + > + elfutils is distributed in the hope that it will be useful, but > + WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with this program. If not, see <http://www.gnu.org/licenses/>. */ > + > +#include <config.h> > +#include <stddef.h> > +#include <stdbool.h> > +#include <libdw.h> > +#include <libdwfl.h> > +#include "../libdwelf/libdwelf.h" > +#include "../libdw/libdwP.h" > +#include "../libdwfl/libdwflP.h" > +#include "../libdw/memory-access.h" Slightly surprised you need almost all internal headers, but OK. > +#define OK 0 > +#define FAIL 1 > + > +static const unsigned char v0[] = { 0x0 }; > +static const unsigned char v23[] = { 23 }; > +static const unsigned char vm_2[] = { 0x7e }; > +static const unsigned char v128[] = { 0x80, 0x01 }; > +static const unsigned char vm_128[] = { 0x80, 0x7f }; > +/* This is pathological but nothing seems to prevent it, and maybe > + it would be useful for a (hypothetical) LEB128 relocation. */ > +static const unsigned char v0000[] = > + { > + 0x80, 0x80, 0x80, 0x80, 0x80, > + 0x80, 0x80, 0x80, 0x80, 0x80, > + 0x80, 0x80, 0x80, 0x80, 0x80, > + 0x0 > + }; > +static const unsigned char vhuge[] = > + { > + 0xff, 0xff, 0xff, 0xff, 0xff, > + 0xff, 0xff, 0xff, 0xff, 0x0 > + }; > +static const unsigned char most_positive[] = > + { > + 0xff, 0xff, 0xff, 0xff, 0xff, > + 0xff, 0xff, 0xff, 0x3f > + }; > +static const unsigned char most_negative[] = > + { > + 0x80, 0x80, 0x80, 0x80, 0x80, > + 0x80, 0x80, 0x80, 0x40 > + }; > +static const unsigned char minus_one[] = > + { > + 0xff, 0xff, 0xff, 0xff, 0xff, > + 0xff, 0xff, 0xff, 0x7f > + }; > +static const unsigned char overflow[] = > + { > + 0xff, 0xff, 0xff, 0xff, 0xff, > + 0xff, 0xff, 0xff, 0xbf, 0x01 > + }; > + > +static int > +test_one_sleb (const unsigned char *data, size_t len, int64_t expect) > +{ > + int64_t value; > + const unsigned char *p; > + > + p = data; > + get_sleb128 (value, p, p + len); > + if (value != expect || p != data + len) > + return FAIL; > + > + p = data; > + get_sleb128_unchecked (value, p); > + if (value != expect || p != data + len) > + return FAIL; > + > + return OK; > +} > + > +static int > +test_sleb (void) > +{ > +#define TEST(ARRAY, V) \ > + if (test_one_sleb (ARRAY, sizeof (ARRAY), V) != OK) \ > + return FAIL; > + > + TEST (v0, 0); > + TEST (v23, 23); > + TEST (vm_2, -2); > + TEST (v128, 128); > + TEST (vm_128, -128); > + TEST (v0000, 0); > + TEST (vhuge, 9223372036854775807ll); > + TEST (most_positive, 4611686018427387903ll); > + TEST (most_negative, -4611686018427387904ll); > + TEST (minus_one, -1); > + TEST (overflow, INT64_MAX); > + > +#undef TEST > + > + return OK; > +} > + > +static int > +test_one_uleb (const unsigned char *data, size_t len, uint64_t expect) > +{ > + uint64_t value; > + const unsigned char *p; > + > + p = data; > + get_uleb128 (value, p, p + len); > + if (value != expect || p != data + len) > + return FAIL; > + > + p = data; > + get_uleb128_unchecked (value, p); > + if (value != expect || p != data + len) > + return FAIL; > + > + return OK; > +} > + > +static int > +test_uleb (void) > +{ > +#define TEST(ARRAY, V) \ > + if (test_one_uleb (ARRAY, sizeof (ARRAY), V) != OK) \ > + return FAIL; > + > + TEST (v0, 0); > + TEST (v23, 23); > + TEST (vm_2, 126); > + TEST (v128, 128); > + TEST (vm_128, 16256); > + TEST (v0000, 0); > + TEST (vhuge, 9223372036854775807ull); > + TEST (most_positive, 4611686018427387903ull); > + TEST (most_negative, 4611686018427387904ull); > + TEST (minus_one, 9223372036854775807ull); > + TEST (overflow, UINT64_MAX); > + > +#undef TEST > + > + return OK; > +} > + > +int > +main (void) > +{ > + return test_sleb () || test_uleb (); > +} Very nice to have tests for this. Maybe we can have some extra "padding" variants for most_postitive/most_negative? Thanks, Mark