Repository: kudu
Updated Branches:
  refs/heads/master 726dedacd -> 8489fa816


KUDU-2427: fix glog symbolization in certain ASLR environments

On Ubuntu 18.04, glog is unable to symbolize the stack frames belonging to
a unit test binary itself, leaving them as "(unknown)". This breaks various
unit tests that check the contents of those frames.

There hasn't been a new glog release in some time, but this upstream patch
fixes the problem.

Change-Id: Iba6f715123ddf41ba67b93860caf7041b8e137c4
Reviewed-on: http://gerrit.cloudera.org:8080/10431
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <t...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/8489fa81
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/8489fa81
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/8489fa81

Branch: refs/heads/master
Commit: 8489fa8168432461dcb3b3f3aa13a818426ed4d9
Parents: 726deda
Author: Adar Dembo <a...@cloudera.com>
Authored: Fri May 11 16:38:35 2018 -0700
Committer: Adar Dembo <a...@cloudera.com>
Committed: Thu May 17 18:07:31 2018 +0000

----------------------------------------------------------------------
 thirdparty/download-thirdparty.sh               |   3 +-
 thirdparty/patches/glog-fix-symbolization.patch | 212 +++++++++++++++++++
 2 files changed, 214 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/8489fa81/thirdparty/download-thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/download-thirdparty.sh 
b/thirdparty/download-thirdparty.sh
index 5ee0ba5..ef6e2f5 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -136,13 +136,14 @@ fetch_and_patch() {
 mkdir -p $TP_SOURCE_DIR
 cd $TP_SOURCE_DIR
 
-GLOG_PATCHLEVEL=2
+GLOG_PATCHLEVEL=3
 fetch_and_patch \
  glog-${GLOG_VERSION}.tar.gz \
  $GLOG_SOURCE \
  $GLOG_PATCHLEVEL \
  "patch -p0 < $TP_DIR/patches/glog-issue-198-fix-unused-warnings.patch" \
  "patch -p0 < $TP_DIR/patches/glog-issue-54-dont-build-tests.patch" \
+ "patch -p1 < $TP_DIR/patches/glog-fix-symbolization.patch" \
  "autoreconf -fvi"
 
 GMOCK_PATCHLEVEL=0

http://git-wip-us.apache.org/repos/asf/kudu/blob/8489fa81/thirdparty/patches/glog-fix-symbolization.patch
----------------------------------------------------------------------
diff --git a/thirdparty/patches/glog-fix-symbolization.patch 
b/thirdparty/patches/glog-fix-symbolization.patch
new file mode 100644
index 0000000..1f11de9
--- /dev/null
+++ b/thirdparty/patches/glog-fix-symbolization.patch
@@ -0,0 +1,212 @@
+commit c4d37a7
+Author: Peter Collingbourne <p...@google.com>
+Date:   Thu Nov 2 16:53:21 2017 -0700
+
+    Compute base addresses from program headers while reading /proc/self/maps.
+    
+    We previously had logic to compute the base address from program
+    headers as part of symbolization. The problem is that we need a correct
+    base address earlier in order to adjust a PC into the image's address
+    space, as these addresses can appear in unsymbolized output.
+    
+    There was previously an assumption that only the mapping that
+    was lowest in the address space did not need to be adjusted. This
+    assumption is not guaranteed (for example, the kernel may choose to
+    map an ET_DYN lowest) and in fact turned out to be wrong in binaries
+    linked with lld because the first mapping is read-only.
+    
+    The solution is to move the program header reading logic into the
+    code that reads /proc/self/maps.
+    
+    There is a change in semantics for clients that install a callback
+    using the InstallSymbolizeOpenObjectFileCallback function. Any such
+    clients will need to return a correct base address from the callback
+    by reading program headers using code similar to that in the function
+    OpenObjectFileContainingPcAndGetStartAddress.
+
+    (Modified by Adar to remove changes to Makefile.am)
+
+diff --git a/src/symbolize.cc b/src/symbolize.cc
+index 953f1db..98a754f 100644
+--- a/src/symbolize.cc
++++ b/src/symbolize.cc
+@@ -56,6 +56,8 @@
+ 
+ #if defined(HAVE_SYMBOLIZE)
+ 
++#include <string.h>
++
+ #include <limits>
+ 
+ #include "symbolize.h"
+@@ -325,41 +327,17 @@ FindSymbol(uint64_t pc, const int fd, char *out, int 
out_size,
+ // both regular and dynamic symbol tables if necessary.  On success,
+ // write the symbol name to "out" and return true.  Otherwise, return
+ // false.
+-static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
+-                                    char *out, int out_size,
+-                                    uint64_t map_base_address) {
++static bool GetSymbolFromObjectFile(const int fd,
++                                    uint64_t pc,
++                                    char* out,
++                                    int out_size,
++                                    uint64_t base_address) {
+   // Read the ELF header.
+   ElfW(Ehdr) elf_header;
+   if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
+     return false;
+   }
+ 
+-  uint64_t symbol_offset = 0;
+-  if (elf_header.e_type == ET_DYN) {  // DSO needs offset adjustment.
+-    ElfW(Phdr) phdr;
+-    // We need to find the PT_LOAD segment corresponding to the read-execute
+-    // file mapping in order to correctly perform the offset adjustment.
+-    for (unsigned i = 0; i != elf_header.e_phnum; ++i) {
+-      if (!ReadFromOffsetExact(fd, &phdr, sizeof(phdr),
+-                               elf_header.e_phoff + i * sizeof(phdr)))
+-        return false;
+-      if (phdr.p_type == PT_LOAD &&
+-          (phdr.p_flags & (PF_R | PF_X)) == (PF_R | PF_X)) {
+-        // Find the mapped address corresponding to virtual address zero. We 
do
+-        // this by first adding p_offset. This gives us the mapped address of
+-        // the start of the segment, or in other words the mapped address
+-        // corresponding to the virtual address of the segment. (Note that 
this
+-        // is distinct from the start address, as p_offset is not guaranteed 
to
+-        // be page aligned.) We then subtract p_vaddr, which takes us to 
virtual
+-        // address zero.
+-        symbol_offset = map_base_address + phdr.p_offset - phdr.p_vaddr;
+-        break;
+-      }
+-    }
+-    if (symbol_offset == 0)
+-      return false;
+-  }
+-
+   ElfW(Shdr) symtab, strtab;
+ 
+   // Consult a regular symbol table first.
+@@ -369,8 +347,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t 
pc,
+                              symtab.sh_link * sizeof(symtab))) {
+       return false;
+     }
+-    if (FindSymbol(pc, fd, out, out_size, symbol_offset,
+-                   &strtab, &symtab)) {
++    if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
+       return true;  // Found the symbol in a regular symbol table.
+     }
+   }
+@@ -382,8 +359,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t 
pc,
+                              symtab.sh_link * sizeof(symtab))) {
+       return false;
+     }
+-    if (FindSymbol(pc, fd, out, out_size, symbol_offset,
+-                   &strtab, &symtab)) {
++    if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
+       return true;  // Found the symbol in a dynamic symbol table.
+     }
+   }
+@@ -532,7 +508,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
+                                              int out_file_name_size) {
+   int object_fd;
+ 
+-  // Open /proc/self/maps.
+   int maps_fd;
+   NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY));
+   FileDescriptor wrapped_maps_fd(maps_fd);
+@@ -540,6 +515,13 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
+     return -1;
+   }
+ 
++  int mem_fd;
++  NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY));
++  FileDescriptor wrapped_mem_fd(mem_fd);
++  if (wrapped_mem_fd.get() < 0) {
++    return -1;
++  }
++
+   // Iterate over maps and look for the map containing the pc.  Then
+   // look into the symbol tables inside.
+   char buf[1024];  // Big enough for line of sane /proc/self/maps
+@@ -575,11 +557,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
+     }
+     ++cursor;  // Skip ' '.
+ 
+-    // Check start and end addresses.
+-    if (!(start_address <= pc && pc < end_address)) {
+-      continue;  // We skip this map.  PC isn't in this map.
+-    }
+-
+     // Read flags.  Skip flags until we encounter a space or eol.
+     const char * const flags_start = cursor;
+     while (cursor < eol && *cursor != ' ') {
+@@ -590,6 +567,49 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
+       return -1;  // Malformed line.
+     }
+ 
++    // Determine the base address by reading ELF headers in process memory.
++    ElfW(Ehdr) ehdr;
++    // Skip non-readable maps.
++    if (flags_start[0] == 'r' &&
++        ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) 
&&
++        memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) {
++      switch (ehdr.e_type) {
++        case ET_EXEC:
++          base_address = 0;
++          break;
++        case ET_DYN:
++          // Find the segment containing file offset 0. This will correspond
++          // to the ELF header that we just read. Normally this will have
++          // virtual address 0, but this is not guaranteed. We must subtract
++          // the virtual address from the address where the ELF header was
++          // mapped to get the base address.
++          //
++          // If we fail to find a segment for file offset 0, use the address
++          // of the ELF header as the base address.
++          base_address = start_address;
++          for (unsigned i = 0; i != ehdr.e_phnum; ++i) {
++            ElfW(Phdr) phdr;
++            if (ReadFromOffsetExact(
++                    mem_fd, &phdr, sizeof(phdr),
++                    start_address + ehdr.e_phoff + i * sizeof(phdr)) &&
++                phdr.p_type == PT_LOAD && phdr.p_offset == 0) {
++              base_address = start_address - phdr.p_vaddr;
++              break;
++            }
++          }
++          break;
++        default:
++          // ET_REL or ET_CORE. These aren't directly executable, so they 
don't
++          // affect the base address.
++          break;
++      }
++    }
++
++    // Check start and end addresses.
++    if (!(start_address <= pc && pc < end_address)) {
++      continue;  // We skip this map.  PC isn't in this map.
++    }
++
+    // Check flags.  We are only interested in "r*x" maps.
+     if (flags_start[0] != 'r' || flags_start[2] != 'x') {
+       continue;  // We skip this map.
+@@ -604,19 +624,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
+     }
+     ++cursor;  // Skip ' '.
+ 
+-    // Don't subtract 'start_address' from the first entry:
+-    // * If a binary is compiled w/o -pie, then the first entry in
+-    //   process maps is likely the binary itself (all dynamic libs
+-    //   are mapped higher in address space). For such a binary,
+-    //   instruction offset in binary coincides with the actual
+-    //   instruction address in virtual memory (as code section
+-    //   is mapped to a fixed memory range).
+-    // * If a binary is compiled with -pie, all the modules are
+-    //   mapped high at address space (in particular, higher than
+-    //   shadow memory of the tool), so the module can't be the
+-    //   first entry.
+-    base_address = ((num_maps == 1) ? 0U : start_address) - file_offset;
+-
+     // Skip to file name.  "cursor" now points to dev.  We need to
+     // skip at least two spaces for dev and inode.
+     int num_spaces = 0;

Reply via email to