clayborg created this revision.
clayborg added reviewers: labath, JDevlieghere, GeorgeHuyubo, yinghuitan, 
kusmour.
Herald added a project: All.
clayborg requested review of this revision.
Herald added a project: LLDB.
Herald added a subscriber: lldb-commits.

Our LLDB parser didn't correctly handle archives of all flavors on different 
systems, it currently only correctly handled BSD archives, normal and thin, on 
macOS, but I noticed that it was getting incorrect information when decoding a 
variety of archives on linux. There were subtle changes to how names were 
encoded that we didn't handle correctly and we also didn't set the result of 
GetObjectSize() correctly as there was some bad math. This didn't matter when 
exracting .o files from .a files for LLDB because the size was always way too 
big, but it was big enough to at least read enough bytes for each object within 
the archive.

This patch does the following:

- switch over to use LLVM's archive parser and avoids previous code duplication
- remove values from ObjectContainerBSDArchive::Object that we don't use like:
  - uid
  - gid
  - mode
- fix ths ObjectContainerBSDArchive::Object::file_size value to be correct
- adds tests to test that we get the correct module specifications


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D159408

Files:
  lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
  lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
  lldb/test/API/functionalities/archives/TestBSDArchives.py
  lldb/test/API/functionalities/archives/b.c
  lldb/test/API/functionalities/archives/c.c

Index: lldb/test/API/functionalities/archives/c.c
===================================================================
--- lldb/test/API/functionalities/archives/c.c
+++ lldb/test/API/functionalities/archives/c.c
@@ -1,5 +1,6 @@
 static int __c_global = 3;
-
+char __extra[4096]; // Make sure sizeof b.o differs from a.o and c.o
+char __extra2[4096]; // Make sure sizeof b.o differs from a.o and c.o
 int c(int arg) {
   int result = arg + __c_global;
   return result;
Index: lldb/test/API/functionalities/archives/b.c
===================================================================
--- lldb/test/API/functionalities/archives/b.c
+++ lldb/test/API/functionalities/archives/b.c
@@ -1,4 +1,5 @@
 static int __b_global = 2;
+char __extra[4096]; // Make sure sizeof b.o differs from a.o and c.o
 
 int b(int arg) {
     int result = arg + __b_global;
Index: lldb/test/API/functionalities/archives/TestBSDArchives.py
===================================================================
--- lldb/test/API/functionalities/archives/TestBSDArchives.py
+++ lldb/test/API/functionalities/archives/TestBSDArchives.py
@@ -70,12 +70,6 @@
         )
         self.expect_var_path("__b_global", type="int", value="2")
 
-        # Test loading thin archives
-        archive_path = self.getBuildArtifact("libbar.a")
-        module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(archive_path)
-        num_specs = module_specs.GetSize()
-        self.assertEqual(num_specs, 1)
-        self.assertEqual(module_specs.GetSpecAtIndex(0).GetObjectName(), "c.o")
 
     def check_frame_variable_errors(self, thread, error_strings):
         command_result = lldb.SBCommandReturnObject()
@@ -129,6 +123,53 @@
         ]
         self.check_frame_variable_errors(thread, error_strings)
 
+    @skipIfRemote
+    def test_archive_specifications(self):
+        """
+        Create archives and make sure the information we get when retrieving
+        the modules specifications is correct.
+        """
+        self.build()
+        libbar_path = self.getBuildArtifact("libbar.a")
+        libfoo_path = self.getBuildArtifact("libfoo.a")
+        libfoothin_path = self.getBuildArtifact("libfoo-thin.a")
+        objfile_a = self.getBuildArtifact("a.o")
+        objfile_b = self.getBuildArtifact("b.o")
+        objfile_c = self.getBuildArtifact("c.o")
+        size_a = os.path.getsize(objfile_a)
+        size_b = os.path.getsize(objfile_b)
+        size_c = os.path.getsize(objfile_c)
+
+        # Test loading normal archives
+        module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(libfoo_path)
+        num_specs = module_specs.GetSize()
+        self.assertEqual(num_specs, 2)
+        spec = module_specs.GetSpecAtIndex(0)
+        self.assertEqual(spec.GetObjectName(), "a.o")
+        self.assertEqual(spec.GetObjectSize(), size_a)
+        spec = module_specs.GetSpecAtIndex(1)
+        self.assertEqual(spec.GetObjectName(), "b.o")
+        self.assertEqual(spec.GetObjectSize(), size_b)
+
+        # Test loading thin archives
+        module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(libbar_path)
+        num_specs = module_specs.GetSize()
+        self.assertEqual(num_specs, 1)
+        spec = module_specs.GetSpecAtIndex(0)
+        self.assertEqual(spec.GetObjectName(), "c.o")
+        self.assertEqual(spec.GetObjectSize(), size_c)
+
+        module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(libfoothin_path)
+        num_specs = module_specs.GetSize()
+        self.assertEqual(num_specs, 2)
+        spec = module_specs.GetSpecAtIndex(0)
+        self.assertEqual(spec.GetObjectName(), "a.o")
+        self.assertEqual(spec.GetObjectSize(), size_a)
+        spec = module_specs.GetSpecAtIndex(1)
+        self.assertEqual(spec.GetObjectName(), "b.o")
+        self.assertEqual(spec.GetObjectSize(), size_b, libfoothin_path)
+
+
     @skipIfRemote
     @skipUnlessDarwin
     def test_frame_var_errors_when_thin_archive_malformed(self):
@@ -142,6 +183,8 @@
         libfoo_path = self.getBuildArtifact("libfoo.a")
         libthin_path = self.getBuildArtifact("libfoo-thin.a")
         objfile_a = self.getBuildArtifact("a.o")
+        objfile_b = self.getBuildArtifact("b.o")
+        objfile_c = self.getBuildArtifact("c.o")
         # Replace the libfoo.a file with a thin archive containing the same
         # debug information (a.o, b.o). Then remove a.o from the file system
         # so we force an error when we set a breakpoint on a() function.
Index: lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
===================================================================
--- lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
+++ lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
@@ -93,15 +93,6 @@
     /// Object modification time in the archive.
     uint32_t modification_time = 0;
 
-    /// Object user id in the archive.
-    uint16_t uid = 0;
-
-    /// Object group id in the archive.
-    uint16_t gid = 0;
-
-    /// Object octal file permissions in the archive.
-    uint16_t mode = 0;
-
     /// Object size in bytes in the archive.
     uint32_t size = 0;
 
@@ -110,6 +101,8 @@
 
     /// Length of the object data.
     lldb::offset_t file_size = 0;
+
+    void Dump() const;
   };
 
   class Archive {
Index: lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
===================================================================
--- lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
+++ lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
@@ -10,7 +10,6 @@
 
 #if defined(_WIN32) || defined(__ANDROID__)
 // Defines from ar, missing on Windows
-#define ARMAG "!<arch>\n"
 #define SARMAG 8
 #define ARFMAG "`\n"
 
@@ -35,6 +34,7 @@
 #include "lldb/Utility/Stream.h"
 #include "lldb/Utility/Timer.h"
 
+#include "llvm/Object/Archive.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 using namespace lldb;
@@ -49,158 +49,19 @@
 void ObjectContainerBSDArchive::Object::Clear() {
   ar_name.Clear();
   modification_time = 0;
-  uid = 0;
-  gid = 0;
-  mode = 0;
   size = 0;
   file_offset = 0;
   file_size = 0;
 }
 
-lldb::offset_t ObjectContainerBSDArchive::Object::ExtractFromThin(
-    const DataExtractor &data, lldb::offset_t offset,
-    llvm::StringRef stringTable) {
-  size_t ar_name_len = 0;
-  std::string str;
-  char *err;
-
-  // File header
-  //
-  // The common format is as follows.
-  //
-  //  Offset  Length	Name            Format
-  //  0       16      File name       ASCII right padded with spaces (no spaces
-  //  allowed in file name)
-  //  16      12      File mod        Decimal as cstring right padded with
-  //  spaces
-  //  28      6       Owner ID        Decimal as cstring right padded with
-  //  spaces
-  //  34      6       Group ID        Decimal as cstring right padded with
-  //  spaces
-  //  40      8       File mode       Octal   as cstring right padded with
-  //  spaces
-  //  48      10      File byte size  Decimal as cstring right padded with
-  //  spaces
-  //  58      2       File magic      0x60 0x0A
-
-  // Make sure there is enough data for the file header and bail if not
-  if (!data.ValidOffsetForDataOfSize(offset, 60))
-    return LLDB_INVALID_OFFSET;
-
-  str.assign((const char *)data.GetData(&offset, 16), 16);
-  if (!(llvm::StringRef(str).startswith("//") || stringTable.empty())) {
-    // Strip off any trailing spaces.
-    const size_t last_pos = str.find_last_not_of(' ');
-    if (last_pos != std::string::npos) {
-      if (last_pos + 1 < 16)
-        str.erase(last_pos + 1);
-    }
-    int start = strtoul(str.c_str() + 1, &err, 10);
-    int end = stringTable.find('\n', start);
-    str.assign(stringTable.data() + start, end - start - 1);
-    ar_name.SetCString(str.c_str());
-  }
-
-  str.assign((const char *)data.GetData(&offset, 12), 12);
-  modification_time = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 6), 6);
-  uid = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 6), 6);
-  gid = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 8), 8);
-  mode = strtoul(str.c_str(), &err, 8);
-
-  str.assign((const char *)data.GetData(&offset, 10), 10);
-  size = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 2), 2);
-  if (str == ARFMAG) {
-    file_offset = offset;
-    file_size = size - ar_name_len;
-    return offset;
-  }
-  return LLDB_INVALID_OFFSET;
-}
-
-lldb::offset_t
-ObjectContainerBSDArchive::Object::Extract(const DataExtractor &data,
-                                           lldb::offset_t offset) {
-  size_t ar_name_len = 0;
-  std::string str;
-  char *err;
-
-  // File header
-  //
-  // The common format is as follows.
-  //
-  //  Offset  Length	Name            Format
-  //  0       16      File name       ASCII right padded with spaces (no spaces
-  //  allowed in file name)
-  //  16      12      File mod        Decimal as cstring right padded with
-  //  spaces
-  //  28      6       Owner ID        Decimal as cstring right padded with
-  //  spaces
-  //  34      6       Group ID        Decimal as cstring right padded with
-  //  spaces
-  //  40      8       File mode       Octal   as cstring right padded with
-  //  spaces
-  //  48      10      File byte size  Decimal as cstring right padded with
-  //  spaces
-  //  58      2       File magic      0x60 0x0A
-
-  // Make sure there is enough data for the file header and bail if not
-  if (!data.ValidOffsetForDataOfSize(offset, 60))
-    return LLDB_INVALID_OFFSET;
-
-  str.assign((const char *)data.GetData(&offset, 16), 16);
-  if (llvm::StringRef(str).startswith("#1/")) {
-    // If the name is longer than 16 bytes, or contains an embedded space then
-    // it will use this format where the length of the name is here and the
-    // name characters are after this header.
-    ar_name_len = strtoul(str.c_str() + 3, &err, 10);
-  } else {
-    // Strip off any trailing spaces.
-    const size_t last_pos = str.find_last_not_of(' ');
-    if (last_pos != std::string::npos) {
-      if (last_pos + 1 < 16)
-        str.erase(last_pos + 1);
-    }
-    ar_name.SetCString(str.c_str());
-  }
-
-  str.assign((const char *)data.GetData(&offset, 12), 12);
-  modification_time = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 6), 6);
-  uid = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 6), 6);
-  gid = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 8), 8);
-  mode = strtoul(str.c_str(), &err, 8);
-
-  str.assign((const char *)data.GetData(&offset, 10), 10);
-  size = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 2), 2);
-  if (str == ARFMAG) {
-    if (ar_name_len > 0) {
-      const void *ar_name_ptr = data.GetData(&offset, ar_name_len);
-      // Make sure there was enough data for the string value and bail if not
-      if (ar_name_ptr == nullptr)
-        return LLDB_INVALID_OFFSET;
-      str.assign((const char *)ar_name_ptr, ar_name_len);
-      ar_name.SetCString(str.c_str());
-    }
-    file_offset = offset;
-    file_size = size - ar_name_len;
-    return offset;
-  }
-  return LLDB_INVALID_OFFSET;
+void ObjectContainerBSDArchive::Object::Dump() const {
+  printf("name        = \"%s\"\n", ar_name.GetCString());
+  printf("mtime       = 0x%8.8" PRIx32 "\n", modification_time);
+  printf("size        = 0x%8.8" PRIx32 " (%" PRIu32 ")\n", size, size);
+  printf("file_offset = 0x%16.16" PRIx64 " (%" PRIu64 ")\n", file_offset,
+         file_offset);
+  printf("file_size   = 0x%16.16" PRIx64 " (%" PRIu64 ")\n\n", file_size,
+         file_size);
 }
 
 ObjectContainerBSDArchive::Archive::Archive(const lldb_private::ArchSpec &arch,
@@ -215,68 +76,60 @@
 
 size_t ObjectContainerBSDArchive::Archive::ParseObjects() {
   DataExtractor &data = m_data;
-  std::string str;
-  lldb::offset_t offset = 0;
-  str.assign((const char *)data.GetData(&offset, SARMAG), SARMAG);
-  if (str == ARMAG) {
-    Object obj;
-    do {
-      offset = obj.Extract(data, offset);
-      if (offset == LLDB_INVALID_OFFSET)
-        break;
-      size_t obj_idx = m_objects.size();
-      m_objects.push_back(obj);
-      // Insert all of the C strings out of order for now...
-      m_object_name_to_index_map.Append(obj.ar_name, obj_idx);
-      offset += obj.file_size;
-      obj.Clear();
-    } while (data.ValidOffset(offset));
-
-    // Now sort all of the object name pointers
-    m_object_name_to_index_map.Sort();
-  } else if (str == ThinArchiveMagic) {
-    Object obj;
-    size_t obj_idx;
-
-    // Retrieve symbol table
-    offset = obj.ExtractFromThin(data, offset, "");
-    if (offset == LLDB_INVALID_OFFSET)
-      return m_objects.size();
-    obj_idx = m_objects.size();
-    m_objects.push_back(obj);
-    // Insert all of the C strings out of order for now...
-    m_object_name_to_index_map.Append(obj.ar_name, obj_idx);
-    offset += obj.file_size;
-    obj.Clear();
 
-    // Retrieve string table
-    offset = obj.ExtractFromThin(data, offset, "");
-    if (offset == LLDB_INVALID_OFFSET)
-      return m_objects.size();
-    obj_idx = m_objects.size();
+  std::unique_ptr<llvm::MemoryBuffer> mem_buffer =
+      llvm::MemoryBuffer::getMemBuffer(
+            llvm::StringRef((const char *)data.GetDataStart(),
+                            data.GetByteSize()),
+            llvm::StringRef(),
+            /*RequiresNullTerminator=*/false);
+
+  auto exp_ar = llvm::object::Archive::create(mem_buffer->getMemBufferRef());
+  if (!exp_ar) {
+    llvm::consumeError(exp_ar.takeError());
+    return 0;
+  }
+  auto llvm_archive = std::move(exp_ar.get());
+
+  llvm::Error iter_err = llvm::Error::success();
+  Object obj;
+  for (const auto &child: llvm_archive->children(iter_err)) {
+    auto exp_name = child.getName();
+    if (exp_name)
+      obj.ar_name = ConstString(exp_name.get());
+    else
+      llvm::consumeError(exp_name.takeError());
+
+    auto exp_mtime = child.getLastModified();
+    if (exp_mtime)
+      obj.modification_time =
+          std::chrono::duration_cast<std::chrono::seconds>(
+              std::chrono::time_point_cast<std::chrono::seconds>(
+                    exp_mtime.get()).time_since_epoch()).count();
+    else
+      llvm::consumeError(exp_mtime.takeError());
+
+    auto exp_size = child.getRawSize();
+    if (exp_size)
+      obj.size = exp_size.get();
+    else
+      llvm::consumeError(exp_size.takeError());
+
+    obj.file_offset = child.getDataOffset();
+
+    auto exp_file_size = child.getSize();
+    if (exp_file_size)
+      obj.file_size = exp_file_size.get();
+    else
+      llvm::consumeError(exp_file_size.takeError());
+    m_object_name_to_index_map.Append(obj.ar_name, m_objects.size());
     m_objects.push_back(obj);
-    // Insert all of the C strings out of order for now...
-    m_object_name_to_index_map.Append(obj.ar_name, obj_idx);
-    // Extract string table
-    llvm::StringRef strtab((const char *)data.GetData(&offset, obj.size),
-                           obj.size);
     obj.Clear();
-
-    // Retrieve object files
-    do {
-      offset = obj.ExtractFromThin(data, offset, strtab);
-      if (offset == LLDB_INVALID_OFFSET)
-        break;
-      obj_idx = m_objects.size();
-      m_objects.push_back(obj);
-      // Insert all of the C strings out of order for now...
-      m_object_name_to_index_map.Append(obj.ar_name, obj_idx);
-      obj.Clear();
-    } while (data.ValidOffset(offset));
-
-    // Now sort all of the object name pointers
-    m_object_name_to_index_map.Sort();
   }
+  if (iter_err)
+    llvm::consumeError(std::move(iter_err));
+  // Now sort all of the object name pointers
+  m_object_name_to_index_map.Sort();
   return m_objects.size();
 }
 
@@ -462,20 +315,21 @@
 ArchiveType
 ObjectContainerBSDArchive::MagicBytesMatch(const DataExtractor &data) {
   uint32_t offset = 0;
-  const char *armag = (const char *)data.PeekData(offset, sizeof(ar_hdr));
+  const char *armag = (const char *)data.PeekData(offset,
+                                                  sizeof(ar_hdr) + SARMAG);
   if (armag == nullptr)
     return ArchiveType::Invalid;
-  if (::strncmp(armag, ARMAG, SARMAG) == 0) {
-    armag += offsetof(struct ar_hdr, ar_fmag) + SARMAG;
-    if (strncmp(armag, ARFMAG, 2) == 0)
-      return ArchiveType::Archive;
-  } else if (::strncmp(armag, ThinArchiveMagic, strlen(ThinArchiveMagic)) ==
-             0) {
-    armag += offsetof(struct ar_hdr, ar_fmag) + strlen(ThinArchiveMagic);
-    if (strncmp(armag, ARFMAG, 2) == 0) {
-      return ArchiveType::ThinArchive;
-    }
-  }
+  ArchiveType result = ArchiveType::Invalid;
+  if (strncmp(armag, ArchiveMagic, SARMAG) == 0)
+      result = ArchiveType::Archive;
+  else if (strncmp(armag, ThinArchiveMagic, SARMAG) == 0)
+      result = ArchiveType::ThinArchive;
+  else
+      return ArchiveType::Invalid;
+
+  armag += offsetof(struct ar_hdr, ar_fmag) + SARMAG;
+  if (strncmp(armag, ARFMAG, 2) == 0)
+      return result;
   return ArchiveType::Invalid;
 }
 
@@ -623,7 +477,7 @@
                 std::chrono::seconds(object->modification_time));
             spec.GetObjectName() = object->ar_name;
             spec.SetObjectOffset(object_file_offset);
-            spec.SetObjectSize(file_size - object_file_offset);
+            spec.SetObjectSize(object->file_size);
             spec.GetObjectModificationTime() = object_mod_time;
           }
         }
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to