Re: [PATCH] Fix leb128 reading

Mark Wielaard Sat, 24 Oct 2020 14:56:28 -0700

Hi Tom,

On Fri, Oct 23, 2020 at 09:58:34PM -0600, Tom Tromey wrote:
> PR 26773 points out that some sleb128 values are decoded incorrectly.
> Looking into this, I found some other unusual cases as well.
> 
> In this patch, I chose to try to handle weird leb128 encodings by
> preserving their values when possible; or returning the maximum value
> on overflow.  It isn't clear to me that this is necessarily the best
> choice.


I think it is a good thing to accept "padded" leb128 numbers, but
probably up to a point. The padding will probably be either mod-4 or
mod-8 bytes. So lets at most read up to 16 bytes total.

Note we use a Signed-off-by like as described in the CONTRIBUTING
document.

> ---
>  .gitignore                |   1 +
>  ChangeLog                 |   4 +
>  libdw/ChangeLog           |  11 +++
>  libdw/dwarf_getlocation.c |   2 +-
>  libdw/memory-access.h     | 153 +++++++++++++++++++-----------------
>  tests/ChangeLog           |   7 ++
>  tests/Makefile.am         |   8 +-
>  tests/leb128.c            | 161 ++++++++++++++++++++++++++++++++++++++
>  8 files changed, 272 insertions(+), 75 deletions(-)
>  create mode 100644 tests/leb128.c
> 
> diff --git a/.gitignore b/.gitignore
> index c9790941..d737b14d 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -151,6 +151,7 @@ Makefile.in
>  /tests/get-units-invalid
>  /tests/get-units-split
>  /tests/hash
> +/tests/leb128
>  /tests/line2addr
>  /tests/low_high_pc
>  /tests/msg_tst
> diff --git a/ChangeLog b/ChangeLog
> index 72e8397c..0b50578d 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,7 @@
> +2020-10-23  Tom Tromey  <[email protected]>
> +
> +     * .gitignore: Add /tests/leb128.
> +
>  2020-10-01  Frank Ch. Eigler  <[email protected]>

Nice, I always forget that.

>       PR25461
> diff --git a/libdw/ChangeLog b/libdw/ChangeLog
> index 8b0b583a..23081c12 100644
> --- a/libdw/ChangeLog
> +++ b/libdw/ChangeLog
> @@ -1,3 +1,14 @@
> +2020-10-23  Tom Tromey  <[email protected]>
> +
> +     PR26773
> +     * dwarf_getlocation.c (store_implicit_value): Use
> +     __libdw_get_uleb128_unchecked.
> +     * memory-access.h (__libdw_max_len_leb128)
> +     (__libdw_max_len_uleb128, __libdw_max_len_sleb128): Remove.
> +     (__libdw_get_uleb128, __libdw_get_uleb128_unchecked): Rewrite.
> +     (get_sleb128_step): Remove.
> +     (__libdw_get_sleb128, __libdw_get_sleb128_unchecked): Rewrite.
> +
>  2020-09-03  Mark Wielaard  <[email protected]>
>  
>       * dwarf.h: Add DW_CFA_AARCH64_negate_ra_state.
> diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c
> index 4617f9e9..0ce2e08a 100644
> --- a/libdw/dwarf_getlocation.c
> +++ b/libdw/dwarf_getlocation.c
> @@ -130,7 +130,7 @@ store_implicit_value (Dwarf *dbg, void **cache, Dwarf_Op 
> *op)
>    struct loc_block_s *block = libdw_alloc (dbg, struct loc_block_s,
>                                          sizeof (struct loc_block_s), 1);
>    const unsigned char *data = (const unsigned char *) (uintptr_t) 
> op->number2;
> -  uint64_t len = __libdw_get_uleb128 (&data, data + len_leb128 (Dwarf_Word));
> +  uint64_t len = __libdw_get_uleb128_unchecked (&data);
>    if (unlikely (len != op->number))
>      return -1;
>    block->addr = op;

Well spotted. So when we read the actual Dwarf_Op we have already read
len as op->number from the same data location. So this time we can
read it "unchecked". Note that we didn't used to do the sanity check
len == op->number before commit commit b7a5bc8aa ("libdw: Detect bad
DWARF in store_implicit_value.") I think we can also simply drop it
again. It really is only an internal sanity check, I don't believe it
can actually catch bad DWARF.

> diff --git a/libdw/memory-access.h b/libdw/memory-access.h
> index a39ad6d2..70317d81 100644
> --- a/libdw/memory-access.h
> +++ b/libdw/memory-access.h
> @@ -39,29 +39,6 @@
>  
>  #define len_leb128(var) ((8 * sizeof (var) + 6) / 7)
>  
> -static inline size_t
> -__libdw_max_len_leb128 (const size_t type_len,
> -                     const unsigned char *addr, const unsigned char *end)
> -{
> -  const size_t pointer_len = likely (addr < end) ? end - addr : 0;
> -  return likely (type_len <= pointer_len) ? type_len : pointer_len;
> -}
> -
> -static inline size_t
> -__libdw_max_len_uleb128 (const unsigned char *addr, const unsigned char *end)
> -{
> -  const size_t type_len = len_leb128 (uint64_t);
> -  return __libdw_max_len_leb128 (type_len, addr, end);
> -}
> -
> -static inline size_t
> -__libdw_max_len_sleb128 (const unsigned char *addr, const unsigned char *end)
> -{
> -  /* Subtract one step, so we don't shift into sign bit.  */
> -  const size_t type_len = len_leb128 (int64_t) - 1;
> -  return __libdw_max_len_leb128 (type_len, addr, end);
> -}

OK, we don't need any of these anymore.

>  #define get_uleb128_step(var, addr, nth)                                   \
>    do {                                                                       
>       \
>      unsigned char __b = *(addr)++;                                         \
> @@ -79,12 +56,39 @@ __libdw_get_uleb128 (const unsigned char **addrp, const 
> unsigned char *end)
>       for the common single-byte case.  */
>    get_uleb128_step (acc, *addrp, 0);
>  
> -  const size_t max = __libdw_max_len_uleb128 (*addrp - 1, end);
> -  for (size_t i = 1; i < max; ++i)
> +  const size_t max = len_leb128 (acc) - 1;
> +  for (size_t i = 1; i < max && *addrp < end; ++i)
>      get_uleb128_step (acc, *addrp, i);
> -  /* Other implementations set VALUE to UINT_MAX in this
> -     case.  So we better do this as well.  */
> -  return UINT64_MAX;
> +
> +  /* A single step to check the final byte, which could cause
> +     overflow.  */
> +  if (*addrp < end)
> +    {
> +      const unsigned mask = (1u << (8 * sizeof (acc) - 7 * max)) - 1;

Shouldn't this be a const unsigned char?
Also, this seems to actually me a constant (since *addrp < end, we must have 
ended up at max).
Is the really just (1u << (8 * 8 - 7 * 9)) - 1 = (1u << 1) - 1 = 1
So we are really only interested in the lowest bit?

> +      if (unlikely ((**addrp & mask) != 0))
> +     {
> +       /* Overflow.  */
> +       ++*addrp;
> +       return UINT64_MAX;
> +     }
> +      get_uleb128_step (acc, *addrp, max);
> +    }

I must admit I don't fully follow the logic of this last step, but
that might be because I am confused about the mask.

> +  /* Now we can read any number of bytes, as long as the payload is 0.
> +     Anything else would overflow.  */
> +  while (*addrp < end)
> +    {
> +      unsigned char b = *(*addrp)++;
> +      if (b == 0x0)
> +     break;
> +      if (b != 0x80)
> +     {
> +       /* Overflow.  */
> +       return UINT64_MAX;
> +     }
> +    }

So I would also restrict this to i < 16 (assuming you keep i above).

> +  return acc;
>  }
>  
>  static inline uint64_t
> @@ -96,65 +100,72 @@ __libdw_get_uleb128_unchecked (const unsigned char 
> **addrp)
>       for the common single-byte case.  */
>    get_uleb128_step (acc, *addrp, 0);
>  
> -  const size_t max = len_leb128 (uint64_t);
> +  const size_t max = len_leb128 (acc) - 1;
>    for (size_t i = 1; i < max; ++i)
>      get_uleb128_step (acc, *addrp, i);
> -  /* Other implementations set VALUE to UINT_MAX in this
> -     case.  So we better do this as well.  */
> -  return UINT64_MAX;
> +
> +  /* A single step to check the final byte, which could cause
> +     overflow.  */
> +  const unsigned mask = (1u << (8 * sizeof (acc) - 7 * max)) - 1;
> +  if (unlikely ((**addrp & mask) != 0))
> +    {
> +      /* Overflow.  */
> +      ++*addrp;
> +      return UINT64_MAX;
> +    }
> +  get_uleb128_step (acc, *addrp, max);
> +
> +  /* Now we can read any number of bytes, as long as the payload is 0.
> +     Anything else would overflow.  */
> +  while (true)
> +    {
> +      unsigned char b = *(*addrp)++;
> +      if (b == 0x0)
> +     break;
> +      if (b != 0x80)
> +     {
> +       /* Overflow.  */
> +       return UINT64_MAX;
> +     }
> +    }
> +
> +  return acc;
>  }

Same comments as above.

>  /* Note, addr needs to me smaller than end. */
>  #define get_uleb128(var, addr, end) ((var) = __libdw_get_uleb128 (&(addr), 
> end))
>  #define get_uleb128_unchecked(var, addr) ((var) = 
> __libdw_get_uleb128_unchecked (&(addr)))
>  
> -/* The signed case is similar, but we sign-extend the result.  */
> -
> -#define get_sleb128_step(var, addr, nth)                                   \
> -  do {                                                                       
>       \
> -    unsigned char __b = *(addr)++;                                         \
> -    if (likely ((__b & 0x80) == 0))                                        \
> -      {                                                                      
>       \
> -     struct { signed int i:7; } __s = { .i = __b };                        \
> -     (var) |= (typeof (var)) __s.i * ((typeof (var)) 1 << ((nth) * 7));    \
> -     return (var);                                                         \
> -      }                                                                      
>       \
> -    (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7);                   \
> -  } while (0)
> -
>  static inline int64_t
>  __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end)
>  {
> -  int64_t acc = 0;
> -
> -  /* Unroll the first step to help the compiler optimize
> -     for the common single-byte case.  */
> -  get_sleb128_step (acc, *addrp, 0);
> -
> -  const size_t max = __libdw_max_len_sleb128 (*addrp - 1, end);
> -  for (size_t i = 1; i < max; ++i)
> -    get_sleb128_step (acc, *addrp, i);
> -  /* Other implementations set VALUE to INT_MAX in this
> -     case.  So we better do this as well.  */
> -  return INT64_MAX;
> +  const unsigned char *start = *addrp;
> +  uint64_t value = __libdw_get_uleb128 (addrp, end);
> +  if (unlikely (value == UINT64_MAX))
> +    return INT64_MAX;

Here you need to help me out a bit, wouldn't UINT64_MAX also be the
representation of -1?

> +  if (((*addrp)[-1] & 0x40) != 0)
> +    {
> +      size_t nbytes = *addrp - start;
> +      if (7 * nbytes < 8 * sizeof (value))
> +     value |= -(((typeof (value)) 1) << 7 * nbytes);
> +    }
> +  return value;
>  }

OK, you can always safely use [-1] because addrp will have been
increased at least by one by __libdw_get_uleb128. value is uint64_t so
the left shift will always be fine. Negating 1 shifted left one beyond
what we believe is the largest but (7 * nbytes) will negate But nbytes
can be so large that shifting left will go beyond the end, which we
don't want because then we get zero. OK

But what I don't fully understand is how this works with a "padded"
leb128, won't we possibly forget to negate in that case?

>  static inline int64_t
>  __libdw_get_sleb128_unchecked (const unsigned char **addrp)
>  {
> -  int64_t acc = 0;
> -
> -  /* Unroll the first step to help the compiler optimize
> -     for the common single-byte case.  */
> -  get_sleb128_step (acc, *addrp, 0);
> -
> -  /* Subtract one step, so we don't shift into sign bit.  */
> -  const size_t max = len_leb128 (int64_t) - 1;
> -  for (size_t i = 1; i < max; ++i)
> -    get_sleb128_step (acc, *addrp, i);
> -  /* Other implementations set VALUE to INT_MAX in this
> -     case.  So we better do this as well.  */
> -  return INT64_MAX;
> +  const unsigned char *start = *addrp;
> +  uint64_t value = __libdw_get_uleb128_unchecked (addrp);
> +  if (unlikely (value == UINT64_MAX))
> +    return INT64_MAX;
> +  if (((*addrp)[-1] & 0x40) != 0)
> +    {
> +      size_t nbytes = *addrp - start;
> +      if (7 * nbytes < 8 * sizeof (value))
> +     value |= -(((typeof (value)) 1) << 7 * nbytes);
> +    }
> +  return value;
>  }

Same comments as above.

>  #define get_sleb128(var, addr, end) ((var) = __libdw_get_sleb128 (&(addr), 
> end))
> diff --git a/tests/ChangeLog b/tests/ChangeLog
> index aa68ffd3..5f075df8 100644
> --- a/tests/ChangeLog
> +++ b/tests/ChangeLog
> @@ -1,3 +1,10 @@
> +2020-10-22  Tom Tromey  <[email protected]>
> +
> +     PR26773
> +     * Makefile.am (check_PROGRAMS, TESTS): Add leb128.
> +     (leb128_LDADD): New variable.
> +     * leb128.c: New file.
> +
>  2020-10-20  Frank Ch. Eigler  <[email protected]>
>  
>       PR26756: more prometheus metrics
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 9d0707da..568a9f8d 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -1,6 +1,6 @@
>  ## Process this file with automake to create Makefile.in
>  ##
> -## Copyright (C) 1996-2019 Red Hat, Inc.
> +## Copyright (C) 1996-2020 Red Hat, Inc.

You may claim your own copyright.

>  ## This file is part of elfutils.
>  ##
>  ## This file is free software; you can redistribute it and/or modify
> @@ -63,7 +63,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx 
> scnnames sectiondump \
>                 all-dwarf-ranges unit-info next_cfi \
>                 elfcopy addsections xlate_notes elfrdwrnop \
>                 dwelf_elf_e_machine_string \
> -               getphdrnum
> +               getphdrnum leb128
>  
>  asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
>           asm-tst6 asm-tst7 asm-tst8 asm-tst9
> @@ -185,7 +185,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh 
> newfile test-nlist \
>       run-elfclassify.sh run-elfclassify-self.sh \
>       run-disasm-riscv64.sh \
>       run-pt_gnu_prop-tests.sh \
> -     run-getphdrnum.sh run-test-includes.sh
> +     run-getphdrnum.sh run-test-includes.sh \
> +     leb128
>  
>  if !BIARCH
>  export ELFUTILS_DISABLE_BIARCH = 1
> @@ -694,6 +695,7 @@ xlate_notes_LDADD = $(libelf)
>  elfrdwrnop_LDADD = $(libelf)
>  dwelf_elf_e_machine_string_LDADD = $(libelf) $(libdw)
>  getphdrnum_LDADD = $(libelf) $(libdw)
> +leb128_LDADD = $(libelf) $(libdw)

Boilerplate looks good.

>  # We want to test the libelf header against the system elf.h header.
>  # Don't include any -I CPPFLAGS. Except when we install our own elf.h.
> diff --git a/tests/leb128.c b/tests/leb128.c
> new file mode 100644
> index 00000000..856a2b89
> --- /dev/null
> +++ b/tests/leb128.c
> @@ -0,0 +1,161 @@
> +/* Test program for leb128
> +   Copyright (C) 2020 Tom Tromey
> +   This file is part of elfutils.
> +
> +   This file is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   elfutils is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#include <config.h>
> +#include <stddef.h>
> +#include <stdbool.h>
> +#include <libdw.h>
> +#include <libdwfl.h>
> +#include "../libdwelf/libdwelf.h"
> +#include "../libdw/libdwP.h"
> +#include "../libdwfl/libdwflP.h"
> +#include "../libdw/memory-access.h"

Slightly surprised you need almost all internal headers, but OK.

> +#define OK 0
> +#define FAIL 1
> +
> +static const unsigned char v0[] = { 0x0 };
> +static const unsigned char v23[] = { 23 };
> +static const unsigned char vm_2[] = { 0x7e };
> +static const unsigned char v128[] = { 0x80, 0x01 };
> +static const unsigned char vm_128[] = { 0x80, 0x7f };
> +/* This is pathological but nothing seems to prevent it, and maybe
> +   it would be useful for a (hypothetical) LEB128 relocation.  */
> +static const unsigned char v0000[] =
> +  {
> +    0x80, 0x80, 0x80, 0x80, 0x80,
> +    0x80, 0x80, 0x80, 0x80, 0x80,
> +    0x80, 0x80, 0x80, 0x80, 0x80,
> +    0x0
> +  };
> +static const unsigned char vhuge[] =
> +  {
> +    0xff, 0xff, 0xff, 0xff, 0xff,
> +    0xff, 0xff, 0xff, 0xff, 0x0
> +  };
> +static const unsigned char most_positive[] =
> +  {
> +    0xff, 0xff, 0xff, 0xff, 0xff,
> +    0xff, 0xff, 0xff, 0x3f
> +  };
> +static const unsigned char most_negative[] =
> +  {
> +    0x80, 0x80, 0x80, 0x80, 0x80,
> +    0x80, 0x80, 0x80, 0x40
> +  };
> +static const unsigned char minus_one[] =
> +  {
> +    0xff, 0xff, 0xff, 0xff, 0xff,
> +    0xff, 0xff, 0xff, 0x7f
> +  };
> +static const unsigned char overflow[] =
> +  {
> +    0xff, 0xff, 0xff, 0xff, 0xff,
> +    0xff, 0xff, 0xff, 0xbf, 0x01
> +  };
> +
> +static int
> +test_one_sleb (const unsigned char *data, size_t len, int64_t expect)
> +{
> +  int64_t value;
> +  const unsigned char *p;
> +
> +  p = data;
> +  get_sleb128 (value, p, p + len);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  p = data;
> +  get_sleb128_unchecked (value, p);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  return OK;
> +}
> +
> +static int
> +test_sleb (void)
> +{
> +#define TEST(ARRAY, V)                                     \
> +  if (test_one_sleb (ARRAY, sizeof (ARRAY), V) != OK) \
> +    return FAIL;
> +
> +  TEST (v0, 0);
> +  TEST (v23, 23);
> +  TEST (vm_2, -2);
> +  TEST (v128, 128);
> +  TEST (vm_128, -128);
> +  TEST (v0000, 0);
> +  TEST (vhuge, 9223372036854775807ll);
> +  TEST (most_positive, 4611686018427387903ll);
> +  TEST (most_negative, -4611686018427387904ll);
> +  TEST (minus_one, -1);
> +  TEST (overflow, INT64_MAX);
> +
> +#undef TEST
> +
> +  return OK;
> +}
> +
> +static int
> +test_one_uleb (const unsigned char *data, size_t len, uint64_t expect)
> +{
> +  uint64_t value;
> +  const unsigned char *p;
> +
> +  p = data;
> +  get_uleb128 (value, p, p + len);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  p = data;
> +  get_uleb128_unchecked (value, p);
> +  if (value != expect || p != data + len)
> +    return FAIL;
> +
> +  return OK;
> +}
> +
> +static int
> +test_uleb (void)
> +{
> +#define TEST(ARRAY, V)                                     \
> +  if (test_one_uleb (ARRAY, sizeof (ARRAY), V) != OK) \
> +    return FAIL;
> +
> +  TEST (v0, 0);
> +  TEST (v23, 23);
> +  TEST (vm_2, 126);
> +  TEST (v128, 128);
> +  TEST (vm_128, 16256);
> +  TEST (v0000, 0);
> +  TEST (vhuge, 9223372036854775807ull);
> +  TEST (most_positive, 4611686018427387903ull);
> +  TEST (most_negative, 4611686018427387904ull);
> +  TEST (minus_one, 9223372036854775807ull);
> +  TEST (overflow, UINT64_MAX);
> +
> +#undef TEST
> +
> +  return OK;
> +}
> +
> +int
> +main (void)
> +{
> +  return test_sleb () || test_uleb ();
> +}

Very nice to have tests for this.
Maybe we can have some extra "padding" variants for 
most_postitive/most_negative?

Thanks,

Mark

Re: [PATCH] Fix leb128 reading

Reply via email to