Repository: kudu Updated Branches: refs/heads/master 726dedacd -> 8489fa816
KUDU-2427: fix glog symbolization in certain ASLR environments On Ubuntu 18.04, glog is unable to symbolize the stack frames belonging to a unit test binary itself, leaving them as "(unknown)". This breaks various unit tests that check the contents of those frames. There hasn't been a new glog release in some time, but this upstream patch fixes the problem. Change-Id: Iba6f715123ddf41ba67b93860caf7041b8e137c4 Reviewed-on: http://gerrit.cloudera.org:8080/10431 Tested-by: Kudu Jenkins Reviewed-by: Todd Lipcon <t...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/8489fa81 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/8489fa81 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/8489fa81 Branch: refs/heads/master Commit: 8489fa8168432461dcb3b3f3aa13a818426ed4d9 Parents: 726deda Author: Adar Dembo <a...@cloudera.com> Authored: Fri May 11 16:38:35 2018 -0700 Committer: Adar Dembo <a...@cloudera.com> Committed: Thu May 17 18:07:31 2018 +0000 ---------------------------------------------------------------------- thirdparty/download-thirdparty.sh | 3 +- thirdparty/patches/glog-fix-symbolization.patch | 212 +++++++++++++++++++ 2 files changed, 214 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/8489fa81/thirdparty/download-thirdparty.sh ---------------------------------------------------------------------- diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh index 5ee0ba5..ef6e2f5 100755 --- a/thirdparty/download-thirdparty.sh +++ b/thirdparty/download-thirdparty.sh @@ -136,13 +136,14 @@ fetch_and_patch() { mkdir -p $TP_SOURCE_DIR cd $TP_SOURCE_DIR -GLOG_PATCHLEVEL=2 +GLOG_PATCHLEVEL=3 fetch_and_patch \ glog-${GLOG_VERSION}.tar.gz \ $GLOG_SOURCE \ $GLOG_PATCHLEVEL \ "patch -p0 < $TP_DIR/patches/glog-issue-198-fix-unused-warnings.patch" \ "patch -p0 < $TP_DIR/patches/glog-issue-54-dont-build-tests.patch" \ + "patch -p1 < $TP_DIR/patches/glog-fix-symbolization.patch" \ "autoreconf -fvi" GMOCK_PATCHLEVEL=0 http://git-wip-us.apache.org/repos/asf/kudu/blob/8489fa81/thirdparty/patches/glog-fix-symbolization.patch ---------------------------------------------------------------------- diff --git a/thirdparty/patches/glog-fix-symbolization.patch b/thirdparty/patches/glog-fix-symbolization.patch new file mode 100644 index 0000000..1f11de9 --- /dev/null +++ b/thirdparty/patches/glog-fix-symbolization.patch @@ -0,0 +1,212 @@ +commit c4d37a7 +Author: Peter Collingbourne <p...@google.com> +Date: Thu Nov 2 16:53:21 2017 -0700 + + Compute base addresses from program headers while reading /proc/self/maps. + + We previously had logic to compute the base address from program + headers as part of symbolization. The problem is that we need a correct + base address earlier in order to adjust a PC into the image's address + space, as these addresses can appear in unsymbolized output. + + There was previously an assumption that only the mapping that + was lowest in the address space did not need to be adjusted. This + assumption is not guaranteed (for example, the kernel may choose to + map an ET_DYN lowest) and in fact turned out to be wrong in binaries + linked with lld because the first mapping is read-only. + + The solution is to move the program header reading logic into the + code that reads /proc/self/maps. + + There is a change in semantics for clients that install a callback + using the InstallSymbolizeOpenObjectFileCallback function. Any such + clients will need to return a correct base address from the callback + by reading program headers using code similar to that in the function + OpenObjectFileContainingPcAndGetStartAddress. + + (Modified by Adar to remove changes to Makefile.am) + +diff --git a/src/symbolize.cc b/src/symbolize.cc +index 953f1db..98a754f 100644 +--- a/src/symbolize.cc ++++ b/src/symbolize.cc +@@ -56,6 +56,8 @@ + + #if defined(HAVE_SYMBOLIZE) + ++#include <string.h> ++ + #include <limits> + + #include "symbolize.h" +@@ -325,41 +327,17 @@ FindSymbol(uint64_t pc, const int fd, char *out, int out_size, + // both regular and dynamic symbol tables if necessary. On success, + // write the symbol name to "out" and return true. Otherwise, return + // false. +-static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, +- char *out, int out_size, +- uint64_t map_base_address) { ++static bool GetSymbolFromObjectFile(const int fd, ++ uint64_t pc, ++ char* out, ++ int out_size, ++ uint64_t base_address) { + // Read the ELF header. + ElfW(Ehdr) elf_header; + if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { + return false; + } + +- uint64_t symbol_offset = 0; +- if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment. +- ElfW(Phdr) phdr; +- // We need to find the PT_LOAD segment corresponding to the read-execute +- // file mapping in order to correctly perform the offset adjustment. +- for (unsigned i = 0; i != elf_header.e_phnum; ++i) { +- if (!ReadFromOffsetExact(fd, &phdr, sizeof(phdr), +- elf_header.e_phoff + i * sizeof(phdr))) +- return false; +- if (phdr.p_type == PT_LOAD && +- (phdr.p_flags & (PF_R | PF_X)) == (PF_R | PF_X)) { +- // Find the mapped address corresponding to virtual address zero. We do +- // this by first adding p_offset. This gives us the mapped address of +- // the start of the segment, or in other words the mapped address +- // corresponding to the virtual address of the segment. (Note that this +- // is distinct from the start address, as p_offset is not guaranteed to +- // be page aligned.) We then subtract p_vaddr, which takes us to virtual +- // address zero. +- symbol_offset = map_base_address + phdr.p_offset - phdr.p_vaddr; +- break; +- } +- } +- if (symbol_offset == 0) +- return false; +- } +- + ElfW(Shdr) symtab, strtab; + + // Consult a regular symbol table first. +@@ -369,8 +347,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, + symtab.sh_link * sizeof(symtab))) { + return false; + } +- if (FindSymbol(pc, fd, out, out_size, symbol_offset, +- &strtab, &symtab)) { ++ if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) { + return true; // Found the symbol in a regular symbol table. + } + } +@@ -382,8 +359,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, + symtab.sh_link * sizeof(symtab))) { + return false; + } +- if (FindSymbol(pc, fd, out, out_size, symbol_offset, +- &strtab, &symtab)) { ++ if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) { + return true; // Found the symbol in a dynamic symbol table. + } + } +@@ -532,7 +508,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, + int out_file_name_size) { + int object_fd; + +- // Open /proc/self/maps. + int maps_fd; + NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY)); + FileDescriptor wrapped_maps_fd(maps_fd); +@@ -540,6 +515,13 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, + return -1; + } + ++ int mem_fd; ++ NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY)); ++ FileDescriptor wrapped_mem_fd(mem_fd); ++ if (wrapped_mem_fd.get() < 0) { ++ return -1; ++ } ++ + // Iterate over maps and look for the map containing the pc. Then + // look into the symbol tables inside. + char buf[1024]; // Big enough for line of sane /proc/self/maps +@@ -575,11 +557,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, + } + ++cursor; // Skip ' '. + +- // Check start and end addresses. +- if (!(start_address <= pc && pc < end_address)) { +- continue; // We skip this map. PC isn't in this map. +- } +- + // Read flags. Skip flags until we encounter a space or eol. + const char * const flags_start = cursor; + while (cursor < eol && *cursor != ' ') { +@@ -590,6 +567,49 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, + return -1; // Malformed line. + } + ++ // Determine the base address by reading ELF headers in process memory. ++ ElfW(Ehdr) ehdr; ++ // Skip non-readable maps. ++ if (flags_start[0] == 'r' && ++ ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) && ++ memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) { ++ switch (ehdr.e_type) { ++ case ET_EXEC: ++ base_address = 0; ++ break; ++ case ET_DYN: ++ // Find the segment containing file offset 0. This will correspond ++ // to the ELF header that we just read. Normally this will have ++ // virtual address 0, but this is not guaranteed. We must subtract ++ // the virtual address from the address where the ELF header was ++ // mapped to get the base address. ++ // ++ // If we fail to find a segment for file offset 0, use the address ++ // of the ELF header as the base address. ++ base_address = start_address; ++ for (unsigned i = 0; i != ehdr.e_phnum; ++i) { ++ ElfW(Phdr) phdr; ++ if (ReadFromOffsetExact( ++ mem_fd, &phdr, sizeof(phdr), ++ start_address + ehdr.e_phoff + i * sizeof(phdr)) && ++ phdr.p_type == PT_LOAD && phdr.p_offset == 0) { ++ base_address = start_address - phdr.p_vaddr; ++ break; ++ } ++ } ++ break; ++ default: ++ // ET_REL or ET_CORE. These aren't directly executable, so they don't ++ // affect the base address. ++ break; ++ } ++ } ++ ++ // Check start and end addresses. ++ if (!(start_address <= pc && pc < end_address)) { ++ continue; // We skip this map. PC isn't in this map. ++ } ++ + // Check flags. We are only interested in "r*x" maps. + if (flags_start[0] != 'r' || flags_start[2] != 'x') { + continue; // We skip this map. +@@ -604,19 +624,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, + } + ++cursor; // Skip ' '. + +- // Don't subtract 'start_address' from the first entry: +- // * If a binary is compiled w/o -pie, then the first entry in +- // process maps is likely the binary itself (all dynamic libs +- // are mapped higher in address space). For such a binary, +- // instruction offset in binary coincides with the actual +- // instruction address in virtual memory (as code section +- // is mapped to a fixed memory range). +- // * If a binary is compiled with -pie, all the modules are +- // mapped high at address space (in particular, higher than +- // shadow memory of the tool), so the module can't be the +- // first entry. +- base_address = ((num_maps == 1) ? 0U : start_address) - file_offset; +- + // Skip to file name. "cursor" now points to dev. We need to + // skip at least two spaces for dev and inode. + int num_spaces = 0;