kwk updated this revision to Diff 222073.
kwk added a comment.

- Cleanup


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67390/new/

https://reviews.llvm.org/D67390

Files:
  lldb/lit/Modules/ELF/Inputs/load-from-dynsym-alone.c
  lldb/lit/Modules/ELF/Inputs/load-symtab-and-dynsym.c
  lldb/lit/Modules/ELF/load-from-dynsym-alone.test
  lldb/lit/Modules/ELF/load-symtab-and-dynsym.test
  lldb/lit/helper/toolchain.py
  lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
  lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h
  lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
  lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h

Index: lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
===================================================================
--- lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
+++ lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
@@ -12,6 +12,7 @@
 #include <stdint.h>
 
 #include <vector>
+#include <unordered_set>
 
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Utility/ArchSpec.h"
@@ -55,7 +56,7 @@
 /// This class provides a generic ELF (32/64 bit) reader plugin implementing
 /// the ObjectFile protocol.
 class ObjectFileELF : public lldb_private::ObjectFile {
-public:
+public:  
   // Static Functions
   static void Initialize();
 
@@ -184,6 +185,8 @@
 
   typedef std::map<lldb::addr_t, lldb_private::AddressClass>
       FileAddressToAddressClassMap;
+  
+  typedef std::unordered_set<elf::NamedELFSymbol> UniqueElfSymbolColl;
 
   /// Version of this reader common to all plugins based on this class.
   static const uint32_t m_plugin_version = 1;
@@ -278,7 +281,8 @@
   /// will parse the symbols only once.  Returns the number of symbols parsed.
   unsigned ParseSymbolTable(lldb_private::Symtab *symbol_table,
                             lldb::user_id_t start_id,
-                            lldb_private::Section *symtab);
+                            lldb_private::Section *symtab,
+                            UniqueElfSymbolColl &unique_elf_symbols);
 
   /// Helper routine for ParseSymbolTable().
   unsigned ParseSymbols(lldb_private::Symtab *symbol_table,
@@ -286,7 +290,8 @@
                         lldb_private::SectionList *section_list,
                         const size_t num_symbols,
                         const lldb_private::DataExtractor &symtab_data,
-                        const lldb_private::DataExtractor &strtab_data);
+                        const lldb_private::DataExtractor &strtab_data,
+                        UniqueElfSymbolColl &unique_elf_symbols);
 
   /// Scans the relocation entries and adds a set of artificial symbols to the
   /// given symbol table for each PLT slot.  Returns the number of symbols
Index: lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
===================================================================
--- lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -1871,7 +1871,8 @@
                                      SectionList *section_list,
                                      const size_t num_symbols,
                                      const DataExtractor &symtab_data,
-                                     const DataExtractor &strtab_data) {
+                                     const DataExtractor &strtab_data,
+                                     UniqueElfSymbolColl &unique_elf_symbols) {
   ELFSymbol symbol;
   lldb::offset_t offset = 0;
 
@@ -2196,20 +2197,28 @@
         symbol_size_valid,              // Symbol size is valid
         has_suffix,                     // Contains linker annotations?
         flags);                         // Symbol flags.
-    symtab->AddSymbol(dc_symbol);
+
+    NamedELFSymbol needle(symbol);
+    needle.st_name_string = ConstString(symbol_bare);
+    if (unique_elf_symbols.find(needle) == unique_elf_symbols.end()) {
+      symtab->AddSymbol(dc_symbol);
+      unique_elf_symbols.insert(needle);
+    }
   }
   return i;
 }
 
-unsigned ObjectFileELF::ParseSymbolTable(Symtab *symbol_table,
-                                         user_id_t start_id,
-                                         lldb_private::Section *symtab) {
+unsigned
+ObjectFileELF::ParseSymbolTable(Symtab *symbol_table, user_id_t start_id,
+                                lldb_private::Section *symtab,
+                                UniqueElfSymbolColl &unique_elf_symbols) {
   if (symtab->GetObjectFile() != this) {
     // If the symbol table section is owned by a different object file, have it
     // do the parsing.
     ObjectFileELF *obj_file_elf =
         static_cast<ObjectFileELF *>(symtab->GetObjectFile());
-    return obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab);
+    return obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab,
+                                          unique_elf_symbols);
   }
 
   // Get section list for this object file.
@@ -2237,7 +2246,7 @@
       size_t num_symbols = symtab_data.GetByteSize() / symtab_hdr->sh_entsize;
 
       return ParseSymbols(symbol_table, start_id, section_list, num_symbols,
-                          symtab_data, strtab_data);
+                          symtab_data, strtab_data, unique_elf_symbols);
     }
   }
 
@@ -2405,7 +2414,7 @@
         true,           // Size is valid
         false,          // Contains linker annotations?
         0);             // Symbol flags.
-
+    
     symbol_table->AddSymbol(jump_symbol);
   }
 
@@ -2635,6 +2644,8 @@
     return module_obj_file->GetSymtab();
 
   if (m_symtab_up == nullptr) {
+    // A unique set of ELF symbols added to the symtab
+    UniqueElfSymbolColl unique_elf_symbols({});
     SectionList *section_list = module_sp->GetSectionList();
     if (!section_list)
       return nullptr;
@@ -2644,22 +2655,31 @@
 
     // Sharable objects and dynamic executables usually have 2 distinct symbol
     // tables, one named ".symtab", and the other ".dynsym". The dynsym is a
-    // smaller version of the symtab that only contains global symbols. The
-    // information found in the dynsym is therefore also found in the symtab,
-    // while the reverse is not necessarily true.
+    // smaller version of the symtab that only contains global symbols.
+    // Information in the dynsym section is *usually* also found in the symtab,
+    // but this is not required as symtab entries can be removed after linking.
+    // The minidebuginfo format makes use of this facility to create smaller
+    // symbol tables.
     Section *symtab =
         section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get();
-    if (!symtab) {
-      // The symtab section is non-allocable and can be stripped, so if it
-      // doesn't exist then use the dynsym section which should always be
-      // there.
-      symtab =
-          section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
-              .get();
-    }
     if (symtab) {
       m_symtab_up.reset(new Symtab(symtab->GetObjectFile()));
-      symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab);
+      symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab,
+                                    unique_elf_symbols);
+    }
+
+    // The symtab section is non-allocable and can be stripped, while the dynsym
+    // section which should always be always be there. If both exist we load
+    // both to support the minidebuginfo case. Otherwise we just load the dynsym
+    // section.
+    Section *dynsym =
+        section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
+            .get();
+    if (dynsym) {
+      if (!m_symtab_up)
+        m_symtab_up.reset(new Symtab(dynsym->GetObjectFile()));
+      symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, dynsym,
+                                    unique_elf_symbols);
     }
 
     // DT_JMPREL
Index: lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h
===================================================================
--- lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h
+++ lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h
@@ -24,6 +24,8 @@
 
 #include "lldb/lldb-enumerations.h"
 #include "lldb/lldb-types.h"
+#include "lldb/Utility/ConstString.h"
+#include "llvm/ADT/Hashing.h"
 
 namespace lldb_private {
 class DataExtractor;
@@ -230,6 +232,8 @@
   elf_half st_shndx;      ///< Section to which this symbol applies.
 
   ELFSymbol();
+  
+  ELFSymbol(const ELFSymbol &other);
 
   /// Returns the binding attribute of the st_info member.
   unsigned char getBinding() const { return st_info >> 4; }
@@ -269,6 +273,26 @@
   void Dump(lldb_private::Stream *s, uint32_t idx,
             const lldb_private::DataExtractor *strtab_data,
             const lldb_private::SectionList *section_list);
+  
+  bool operator==(const ELFSymbol &rhs) const {
+    return st_info == rhs.st_info && st_name == rhs.st_name &&
+           st_size == rhs.st_size && st_other == rhs.st_other &&
+           st_shndx == rhs.st_shndx && st_value == rhs.st_value;
+  }
+};
+
+// A NamedELFSymbol is an ELFSymbol that, alongside it's name index, stores the
+// name of the symbol as a string and only uses that for comparison instead of
+// the name index which could differ depending on which section the symbol is
+// defined in (e.g. .strtab or .dynstr).
+struct NamedELFSymbol : ELFSymbol {
+  lldb_private::ConstString st_name_string; ///< Actual name of the ELF symbol
+
+  NamedELFSymbol();
+
+  NamedELFSymbol(const ELFSymbol & sym);
+
+  bool operator==(const NamedELFSymbol &rhs) const;
 };
 
 /// \class ELFDynamic
@@ -391,4 +415,33 @@
 
 } // End namespace elf.
 
+namespace std {
+
+// Define specializations of the std::hash struct for ELFSymbol and
+// NamedELFSymbol so they can be used in an std::unordered_set
+
+template <> struct hash<elf::ELFSymbol> {
+  std::size_t operator()(const elf::ELFSymbol &s) const noexcept {
+    std::size_t h1 = std::hash<elf::elf_addr>()(s.st_value);
+    std::size_t h2 = std::hash<elf::elf_xword>()(s.st_size);
+    std::size_t h3 = std::hash<elf::elf_word>()(s.st_name);
+    std::size_t h4 = std::hash<unsigned char>()(s.st_info);
+    std::size_t h5 = std::hash<unsigned char>()(s.st_other);
+    std::size_t h6 = std::hash<elf::elf_half>()(s.st_shndx);
+    return llvm::hash_combine(h1, h2, h3, h4, h5, h6);
+  }
+};
+
+template <> struct hash<elf::NamedELFSymbol> {
+  std::size_t operator()(const elf::NamedELFSymbol &s) const noexcept {
+    elf::ELFSymbol tmp = s;
+    // ignore the name when hashing the ELFSymbol
+    tmp.st_name = 0;
+    std::size_t h1 = std::hash<elf::ELFSymbol>()(tmp);
+    std::size_t h2 = std::hash<const char *>()(s.st_name_string.AsCString());
+    return llvm::hash_combine(h1, h2);
+  }
+};
+} // namespace std
+
 #endif // #ifndef liblldb_ELFHeader_h_
Index: lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
===================================================================
--- lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
+++ lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
@@ -250,6 +250,11 @@
 
 ELFSymbol::ELFSymbol() { memset(this, 0, sizeof(ELFSymbol)); }
 
+ELFSymbol::ELFSymbol(const ELFSymbol &other)
+    : st_value(other.st_value), st_size(other.st_size), st_name(other.st_name),
+      st_info(other.st_info), st_other(other.st_other),
+      st_shndx(other.st_shndx) {}
+
 #define ENUM_TO_CSTR(e)                                                        \
   case e:                                                                      \
     return #e
@@ -353,6 +358,21 @@
   return true;
 }
 
+// NamedELFSymbol
+
+NamedELFSymbol::NamedELFSymbol() : ELFSymbol() {}
+
+NamedELFSymbol::NamedELFSymbol(const ELFSymbol &other) : ELFSymbol(other) {}
+
+bool NamedELFSymbol::operator==(const NamedELFSymbol &rhs) const {
+  NamedELFSymbol r = rhs;
+  // Always ignore the name index when comparing the symbols
+  // and instead compare the symbol name itself.
+  r.st_name = st_name;
+  return elf::ELFSymbol::operator==(r) &&
+         st_name_string == rhs.st_name_string;
+}
+
 // ELFProgramHeader
 
 ELFProgramHeader::ELFProgramHeader() {
Index: lldb/lit/helper/toolchain.py
===================================================================
--- lldb/lit/helper/toolchain.py
+++ lldb/lit/helper/toolchain.py
@@ -126,6 +126,6 @@
 
     support_tools = ['yaml2obj', 'obj2yaml', 'llvm-pdbutil',
                      'llvm-mc', 'llvm-readobj', 'llvm-objdump',
-                     'llvm-objcopy', 'lli']
+                     'llvm-objcopy', 'lli', 'llvm-strip']
     additional_tool_dirs += [config.lldb_tools_dir, config.llvm_tools_dir]
     llvm_config.add_tool_substitutions(support_tools, additional_tool_dirs)
Index: lldb/lit/Modules/ELF/load-symtab-and-dynsym.test
===================================================================
--- /dev/null
+++ lldb/lit/Modules/ELF/load-symtab-and-dynsym.test
@@ -0,0 +1,48 @@
+# REQUIRES: system-linux
+
+# This test ensures that we will load .dynsym even if there's a .symtab section.
+# We do this by compiling a small C program with two functions and we direct the
+# linker where to put the symbols so that in the end the layout is as follows:
+#
+#   Symbol table '.dynsym' contains 4 entries:
+#   Num:    Value          Size Type    Bind   Vis      Ndx Name
+#     0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND 
+#     1: 0000000000000000     0 FUNC    GLOBAL DEFAULT  UND __libc_start_main@GLIBC_2.2.5
+#     2: 0000000000000000     0 NOTYPE  WEAK   DEFAULT  UND __gmon_start__
+#     3: 0000000000401120    13 FUNC    GLOBAL DEFAULT   10 functionInDynsym
+#   
+#   Symbol table '.symtab' contains 2 entries:
+#   Num:    Value          Size Type    Bind   Vis      Ndx Name
+#     0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND 
+#     1: 0000000000401110    15 FUNC    GLOBAL DEFAULT   10 functionInSymtab
+
+# We want to keep the symbol "functionInDynsym" in the .dynsym section and not
+# have it put the default .symtab section.
+# RUN: echo "{functionInDynsym;};" > %T/dynmic-symbols.txt
+# RUN: %clang -Wl,--dynamic-list=%T/dynmic-symbols.txt -g -o %t.binary %p/Inputs/load-symtab-and-dynsym.c
+
+# Remove not needed symbols
+# RUN: echo "functionInSymtab" > %t.keep_symbols
+# RUN: echo "functionInDynsym" >> %t.keep_symbols
+# RUN: llvm-objcopy --strip-all --remove-section .gdb_index --remove-section .comment --keep-symbols=%t.keep_symbols %t.binary
+
+# Remove functionInDynsym symbol from .symtab (will leave symbol in .dynsym intact)
+# RUN: llvm-strip --strip-symbol=functionInDynsym %t.binary
+
+# RUN: %lldb -b -o 'b functionInSymtab' -o 'b functionInDynsym' -o 'run' -o 'continue' %t.binary | FileCheck %s
+
+# CHECK: (lldb) b functionInSymtab
+# CHECK-NEXT: Breakpoint 1: where = {{.*}}.binary`functionInSymtab, address = 0x{{.*}}
+
+# CHECK: (lldb) b functionInDynsym
+# CHECK-NEXT: Breakpoint 2: where = {{.*}}.binary`functionInDynsym, address = 0x{{.*}}
+
+# CHECK: (lldb) run
+# CHECK-NEXT: Process {{.*}} stopped
+# CHECK-NEXT: * thread #1, name = 'load-symtab-and', stop reason = breakpoint 1.1
+
+# CHECK: (lldb) continue
+# CHECK-NEXT: Process {{.*}} resuming
+# CHECK-NEXT: Process {{.*}} stopped
+# CHECK-NEXT: * thread #1, name = 'load-symtab-and', stop reason = breakpoint 2.1
+
Index: lldb/lit/Modules/ELF/load-from-dynsym-alone.test
===================================================================
--- /dev/null
+++ lldb/lit/Modules/ELF/load-from-dynsym-alone.test
@@ -0,0 +1,33 @@
+# REQUIRES: system-linux
+
+# This test ensures that we will load .dynsym even if there's no .symtab section.
+# We do this by compiling a small C program with a function and we direct the
+# linker where to put the symbols so that in the end the layout is as follows:
+#
+#   Symbol table '.dynsym' contains 4 entries:
+#      Num:    Value          Size Type    Bind   Vis      Ndx Name
+#        0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND 
+#        1: 0000000000000000     0 FUNC    GLOBAL DEFAULT  UND __libc_start_main@GLIBC_2.2.5
+#        2: 0000000000000000     0 NOTYPE  WEAK   DEFAULT  UND __gmon_start__
+#        3: 0000000000401110    13 FUNC    GLOBAL DEFAULT   10 functionInDynsym
+
+# We want to keep the symbol "functionInDynsym" in the .dynsym section and not
+# have it put the default .symtab section.
+# RUN: echo "{functionInDynsym;};" > %T/dynmic-symbols.txt
+# RUN: %clang -Wl,--dynamic-list=%T/dynmic-symbols.txt -g -o %t.binary %p/Inputs/load-from-dynsym-alone.c
+
+# Remove not needed symbols
+# RUN: echo "functionInDynsym" > %t.keep_symbols
+# RUN: llvm-objcopy --strip-all --remove-section .gdb_index --remove-section .comment --keep-symbols=%t.keep_symbols %t.binary
+
+# Remove functionInDynsym symbol from .symtab (will leave symbol in .dynsym intact)
+# RUN: llvm-strip --strip-symbol=functionInDynsym %t.binary
+
+# RUN: %lldb -b -o 'b functionInDynsym' -o 'run' -o 'continue' %t.binary | FileCheck %s
+
+# CHECK: (lldb) b functionInDynsym
+# CHECK-NEXT: Breakpoint 1: where = {{.*}}.binary`functionInDynsym, address = 0x{{.*}}
+
+# CHECK: (lldb) run
+# CHECK-NEXT: Process {{.*}} stopped
+# CHECK-NEXT: * thread #1, name = 'load-from-dynsy', stop reason = breakpoint 1.1
Index: lldb/lit/Modules/ELF/Inputs/load-symtab-and-dynsym.c
===================================================================
--- /dev/null
+++ lldb/lit/Modules/ELF/Inputs/load-symtab-and-dynsym.c
@@ -0,0 +1,12 @@
+// This function will be embedded within the .symtab section of the
+// .gnu_debugdata section.
+int functionInSymtab(int num) { return num * 4; }
+
+// This function will be embedded within the .dynsym section of the main binary.
+int functionInDynsym(int num) { return num * 3; }
+
+int main(int argc, char *argv[]) {
+  int x = functionInSymtab(argc);
+  int y = functionInDynsym(x);
+  return y;
+}
Index: lldb/lit/Modules/ELF/Inputs/load-from-dynsym-alone.c
===================================================================
--- /dev/null
+++ lldb/lit/Modules/ELF/Inputs/load-from-dynsym-alone.c
@@ -0,0 +1,7 @@
+// This function will be embedded within the .dynsym section of the main binary.
+int functionInDynsym(int num) { return num * 3; }
+
+int main(int argc, char *argv[]) {
+  int y = functionInDynsym(argc);
+  return y;
+}
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to