clayborg created this revision. clayborg added reviewers: labath, JDevlieghere, GeorgeHuyubo, yinghuitan, kusmour. Herald added a project: All. clayborg requested review of this revision. Herald added a project: LLDB. Herald added a subscriber: lldb-commits.
Our LLDB parser didn't correctly handle archives of all flavors on different systems, it currently only correctly handled BSD archives, normal and thin, on macOS, but I noticed that it was getting incorrect information when decoding a variety of archives on linux. There were subtle changes to how names were encoded that we didn't handle correctly and we also didn't set the result of GetObjectSize() correctly as there was some bad math. This didn't matter when exracting .o files from .a files for LLDB because the size was always way too big, but it was big enough to at least read enough bytes for each object within the archive. This patch does the following: - switch over to use LLVM's archive parser and avoids previous code duplication - remove values from ObjectContainerBSDArchive::Object that we don't use like: - uid - gid - mode - fix ths ObjectContainerBSDArchive::Object::file_size value to be correct - adds tests to test that we get the correct module specifications Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D159408 Files: lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h lldb/test/API/functionalities/archives/TestBSDArchives.py lldb/test/API/functionalities/archives/b.c lldb/test/API/functionalities/archives/c.c
Index: lldb/test/API/functionalities/archives/c.c =================================================================== --- lldb/test/API/functionalities/archives/c.c +++ lldb/test/API/functionalities/archives/c.c @@ -1,5 +1,6 @@ static int __c_global = 3; - +char __extra[4096]; // Make sure sizeof b.o differs from a.o and c.o +char __extra2[4096]; // Make sure sizeof b.o differs from a.o and c.o int c(int arg) { int result = arg + __c_global; return result; Index: lldb/test/API/functionalities/archives/b.c =================================================================== --- lldb/test/API/functionalities/archives/b.c +++ lldb/test/API/functionalities/archives/b.c @@ -1,4 +1,5 @@ static int __b_global = 2; +char __extra[4096]; // Make sure sizeof b.o differs from a.o and c.o int b(int arg) { int result = arg + __b_global; Index: lldb/test/API/functionalities/archives/TestBSDArchives.py =================================================================== --- lldb/test/API/functionalities/archives/TestBSDArchives.py +++ lldb/test/API/functionalities/archives/TestBSDArchives.py @@ -70,12 +70,6 @@ ) self.expect_var_path("__b_global", type="int", value="2") - # Test loading thin archives - archive_path = self.getBuildArtifact("libbar.a") - module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(archive_path) - num_specs = module_specs.GetSize() - self.assertEqual(num_specs, 1) - self.assertEqual(module_specs.GetSpecAtIndex(0).GetObjectName(), "c.o") def check_frame_variable_errors(self, thread, error_strings): command_result = lldb.SBCommandReturnObject() @@ -129,6 +123,53 @@ ] self.check_frame_variable_errors(thread, error_strings) + @skipIfRemote + def test_archive_specifications(self): + """ + Create archives and make sure the information we get when retrieving + the modules specifications is correct. + """ + self.build() + libbar_path = self.getBuildArtifact("libbar.a") + libfoo_path = self.getBuildArtifact("libfoo.a") + libfoothin_path = self.getBuildArtifact("libfoo-thin.a") + objfile_a = self.getBuildArtifact("a.o") + objfile_b = self.getBuildArtifact("b.o") + objfile_c = self.getBuildArtifact("c.o") + size_a = os.path.getsize(objfile_a) + size_b = os.path.getsize(objfile_b) + size_c = os.path.getsize(objfile_c) + + # Test loading normal archives + module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(libfoo_path) + num_specs = module_specs.GetSize() + self.assertEqual(num_specs, 2) + spec = module_specs.GetSpecAtIndex(0) + self.assertEqual(spec.GetObjectName(), "a.o") + self.assertEqual(spec.GetObjectSize(), size_a) + spec = module_specs.GetSpecAtIndex(1) + self.assertEqual(spec.GetObjectName(), "b.o") + self.assertEqual(spec.GetObjectSize(), size_b) + + # Test loading thin archives + module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(libbar_path) + num_specs = module_specs.GetSize() + self.assertEqual(num_specs, 1) + spec = module_specs.GetSpecAtIndex(0) + self.assertEqual(spec.GetObjectName(), "c.o") + self.assertEqual(spec.GetObjectSize(), size_c) + + module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(libfoothin_path) + num_specs = module_specs.GetSize() + self.assertEqual(num_specs, 2) + spec = module_specs.GetSpecAtIndex(0) + self.assertEqual(spec.GetObjectName(), "a.o") + self.assertEqual(spec.GetObjectSize(), size_a) + spec = module_specs.GetSpecAtIndex(1) + self.assertEqual(spec.GetObjectName(), "b.o") + self.assertEqual(spec.GetObjectSize(), size_b, libfoothin_path) + + @skipIfRemote @skipUnlessDarwin def test_frame_var_errors_when_thin_archive_malformed(self): @@ -142,6 +183,8 @@ libfoo_path = self.getBuildArtifact("libfoo.a") libthin_path = self.getBuildArtifact("libfoo-thin.a") objfile_a = self.getBuildArtifact("a.o") + objfile_b = self.getBuildArtifact("b.o") + objfile_c = self.getBuildArtifact("c.o") # Replace the libfoo.a file with a thin archive containing the same # debug information (a.o, b.o). Then remove a.o from the file system # so we force an error when we set a breakpoint on a() function. Index: lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h =================================================================== --- lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h +++ lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h @@ -93,15 +93,6 @@ /// Object modification time in the archive. uint32_t modification_time = 0; - /// Object user id in the archive. - uint16_t uid = 0; - - /// Object group id in the archive. - uint16_t gid = 0; - - /// Object octal file permissions in the archive. - uint16_t mode = 0; - /// Object size in bytes in the archive. uint32_t size = 0; @@ -110,6 +101,8 @@ /// Length of the object data. lldb::offset_t file_size = 0; + + void Dump() const; }; class Archive { Index: lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp =================================================================== --- lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp +++ lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp @@ -10,7 +10,6 @@ #if defined(_WIN32) || defined(__ANDROID__) // Defines from ar, missing on Windows -#define ARMAG "!<arch>\n" #define SARMAG 8 #define ARFMAG "`\n" @@ -35,6 +34,7 @@ #include "lldb/Utility/Stream.h" #include "lldb/Utility/Timer.h" +#include "llvm/Object/Archive.h" #include "llvm/Support/MemoryBuffer.h" using namespace lldb; @@ -49,158 +49,19 @@ void ObjectContainerBSDArchive::Object::Clear() { ar_name.Clear(); modification_time = 0; - uid = 0; - gid = 0; - mode = 0; size = 0; file_offset = 0; file_size = 0; } -lldb::offset_t ObjectContainerBSDArchive::Object::ExtractFromThin( - const DataExtractor &data, lldb::offset_t offset, - llvm::StringRef stringTable) { - size_t ar_name_len = 0; - std::string str; - char *err; - - // File header - // - // The common format is as follows. - // - // Offset Length Name Format - // 0 16 File name ASCII right padded with spaces (no spaces - // allowed in file name) - // 16 12 File mod Decimal as cstring right padded with - // spaces - // 28 6 Owner ID Decimal as cstring right padded with - // spaces - // 34 6 Group ID Decimal as cstring right padded with - // spaces - // 40 8 File mode Octal as cstring right padded with - // spaces - // 48 10 File byte size Decimal as cstring right padded with - // spaces - // 58 2 File magic 0x60 0x0A - - // Make sure there is enough data for the file header and bail if not - if (!data.ValidOffsetForDataOfSize(offset, 60)) - return LLDB_INVALID_OFFSET; - - str.assign((const char *)data.GetData(&offset, 16), 16); - if (!(llvm::StringRef(str).startswith("//") || stringTable.empty())) { - // Strip off any trailing spaces. - const size_t last_pos = str.find_last_not_of(' '); - if (last_pos != std::string::npos) { - if (last_pos + 1 < 16) - str.erase(last_pos + 1); - } - int start = strtoul(str.c_str() + 1, &err, 10); - int end = stringTable.find('\n', start); - str.assign(stringTable.data() + start, end - start - 1); - ar_name.SetCString(str.c_str()); - } - - str.assign((const char *)data.GetData(&offset, 12), 12); - modification_time = strtoul(str.c_str(), &err, 10); - - str.assign((const char *)data.GetData(&offset, 6), 6); - uid = strtoul(str.c_str(), &err, 10); - - str.assign((const char *)data.GetData(&offset, 6), 6); - gid = strtoul(str.c_str(), &err, 10); - - str.assign((const char *)data.GetData(&offset, 8), 8); - mode = strtoul(str.c_str(), &err, 8); - - str.assign((const char *)data.GetData(&offset, 10), 10); - size = strtoul(str.c_str(), &err, 10); - - str.assign((const char *)data.GetData(&offset, 2), 2); - if (str == ARFMAG) { - file_offset = offset; - file_size = size - ar_name_len; - return offset; - } - return LLDB_INVALID_OFFSET; -} - -lldb::offset_t -ObjectContainerBSDArchive::Object::Extract(const DataExtractor &data, - lldb::offset_t offset) { - size_t ar_name_len = 0; - std::string str; - char *err; - - // File header - // - // The common format is as follows. - // - // Offset Length Name Format - // 0 16 File name ASCII right padded with spaces (no spaces - // allowed in file name) - // 16 12 File mod Decimal as cstring right padded with - // spaces - // 28 6 Owner ID Decimal as cstring right padded with - // spaces - // 34 6 Group ID Decimal as cstring right padded with - // spaces - // 40 8 File mode Octal as cstring right padded with - // spaces - // 48 10 File byte size Decimal as cstring right padded with - // spaces - // 58 2 File magic 0x60 0x0A - - // Make sure there is enough data for the file header and bail if not - if (!data.ValidOffsetForDataOfSize(offset, 60)) - return LLDB_INVALID_OFFSET; - - str.assign((const char *)data.GetData(&offset, 16), 16); - if (llvm::StringRef(str).startswith("#1/")) { - // If the name is longer than 16 bytes, or contains an embedded space then - // it will use this format where the length of the name is here and the - // name characters are after this header. - ar_name_len = strtoul(str.c_str() + 3, &err, 10); - } else { - // Strip off any trailing spaces. - const size_t last_pos = str.find_last_not_of(' '); - if (last_pos != std::string::npos) { - if (last_pos + 1 < 16) - str.erase(last_pos + 1); - } - ar_name.SetCString(str.c_str()); - } - - str.assign((const char *)data.GetData(&offset, 12), 12); - modification_time = strtoul(str.c_str(), &err, 10); - - str.assign((const char *)data.GetData(&offset, 6), 6); - uid = strtoul(str.c_str(), &err, 10); - - str.assign((const char *)data.GetData(&offset, 6), 6); - gid = strtoul(str.c_str(), &err, 10); - - str.assign((const char *)data.GetData(&offset, 8), 8); - mode = strtoul(str.c_str(), &err, 8); - - str.assign((const char *)data.GetData(&offset, 10), 10); - size = strtoul(str.c_str(), &err, 10); - - str.assign((const char *)data.GetData(&offset, 2), 2); - if (str == ARFMAG) { - if (ar_name_len > 0) { - const void *ar_name_ptr = data.GetData(&offset, ar_name_len); - // Make sure there was enough data for the string value and bail if not - if (ar_name_ptr == nullptr) - return LLDB_INVALID_OFFSET; - str.assign((const char *)ar_name_ptr, ar_name_len); - ar_name.SetCString(str.c_str()); - } - file_offset = offset; - file_size = size - ar_name_len; - return offset; - } - return LLDB_INVALID_OFFSET; +void ObjectContainerBSDArchive::Object::Dump() const { + printf("name = \"%s\"\n", ar_name.GetCString()); + printf("mtime = 0x%8.8" PRIx32 "\n", modification_time); + printf("size = 0x%8.8" PRIx32 " (%" PRIu32 ")\n", size, size); + printf("file_offset = 0x%16.16" PRIx64 " (%" PRIu64 ")\n", file_offset, + file_offset); + printf("file_size = 0x%16.16" PRIx64 " (%" PRIu64 ")\n\n", file_size, + file_size); } ObjectContainerBSDArchive::Archive::Archive(const lldb_private::ArchSpec &arch, @@ -215,68 +76,60 @@ size_t ObjectContainerBSDArchive::Archive::ParseObjects() { DataExtractor &data = m_data; - std::string str; - lldb::offset_t offset = 0; - str.assign((const char *)data.GetData(&offset, SARMAG), SARMAG); - if (str == ARMAG) { - Object obj; - do { - offset = obj.Extract(data, offset); - if (offset == LLDB_INVALID_OFFSET) - break; - size_t obj_idx = m_objects.size(); - m_objects.push_back(obj); - // Insert all of the C strings out of order for now... - m_object_name_to_index_map.Append(obj.ar_name, obj_idx); - offset += obj.file_size; - obj.Clear(); - } while (data.ValidOffset(offset)); - - // Now sort all of the object name pointers - m_object_name_to_index_map.Sort(); - } else if (str == ThinArchiveMagic) { - Object obj; - size_t obj_idx; - - // Retrieve symbol table - offset = obj.ExtractFromThin(data, offset, ""); - if (offset == LLDB_INVALID_OFFSET) - return m_objects.size(); - obj_idx = m_objects.size(); - m_objects.push_back(obj); - // Insert all of the C strings out of order for now... - m_object_name_to_index_map.Append(obj.ar_name, obj_idx); - offset += obj.file_size; - obj.Clear(); - // Retrieve string table - offset = obj.ExtractFromThin(data, offset, ""); - if (offset == LLDB_INVALID_OFFSET) - return m_objects.size(); - obj_idx = m_objects.size(); + std::unique_ptr<llvm::MemoryBuffer> mem_buffer = + llvm::MemoryBuffer::getMemBuffer( + llvm::StringRef((const char *)data.GetDataStart(), + data.GetByteSize()), + llvm::StringRef(), + /*RequiresNullTerminator=*/false); + + auto exp_ar = llvm::object::Archive::create(mem_buffer->getMemBufferRef()); + if (!exp_ar) { + llvm::consumeError(exp_ar.takeError()); + return 0; + } + auto llvm_archive = std::move(exp_ar.get()); + + llvm::Error iter_err = llvm::Error::success(); + Object obj; + for (const auto &child: llvm_archive->children(iter_err)) { + auto exp_name = child.getName(); + if (exp_name) + obj.ar_name = ConstString(exp_name.get()); + else + llvm::consumeError(exp_name.takeError()); + + auto exp_mtime = child.getLastModified(); + if (exp_mtime) + obj.modification_time = + std::chrono::duration_cast<std::chrono::seconds>( + std::chrono::time_point_cast<std::chrono::seconds>( + exp_mtime.get()).time_since_epoch()).count(); + else + llvm::consumeError(exp_mtime.takeError()); + + auto exp_size = child.getRawSize(); + if (exp_size) + obj.size = exp_size.get(); + else + llvm::consumeError(exp_size.takeError()); + + obj.file_offset = child.getDataOffset(); + + auto exp_file_size = child.getSize(); + if (exp_file_size) + obj.file_size = exp_file_size.get(); + else + llvm::consumeError(exp_file_size.takeError()); + m_object_name_to_index_map.Append(obj.ar_name, m_objects.size()); m_objects.push_back(obj); - // Insert all of the C strings out of order for now... - m_object_name_to_index_map.Append(obj.ar_name, obj_idx); - // Extract string table - llvm::StringRef strtab((const char *)data.GetData(&offset, obj.size), - obj.size); obj.Clear(); - - // Retrieve object files - do { - offset = obj.ExtractFromThin(data, offset, strtab); - if (offset == LLDB_INVALID_OFFSET) - break; - obj_idx = m_objects.size(); - m_objects.push_back(obj); - // Insert all of the C strings out of order for now... - m_object_name_to_index_map.Append(obj.ar_name, obj_idx); - obj.Clear(); - } while (data.ValidOffset(offset)); - - // Now sort all of the object name pointers - m_object_name_to_index_map.Sort(); } + if (iter_err) + llvm::consumeError(std::move(iter_err)); + // Now sort all of the object name pointers + m_object_name_to_index_map.Sort(); return m_objects.size(); } @@ -462,20 +315,21 @@ ArchiveType ObjectContainerBSDArchive::MagicBytesMatch(const DataExtractor &data) { uint32_t offset = 0; - const char *armag = (const char *)data.PeekData(offset, sizeof(ar_hdr)); + const char *armag = (const char *)data.PeekData(offset, + sizeof(ar_hdr) + SARMAG); if (armag == nullptr) return ArchiveType::Invalid; - if (::strncmp(armag, ARMAG, SARMAG) == 0) { - armag += offsetof(struct ar_hdr, ar_fmag) + SARMAG; - if (strncmp(armag, ARFMAG, 2) == 0) - return ArchiveType::Archive; - } else if (::strncmp(armag, ThinArchiveMagic, strlen(ThinArchiveMagic)) == - 0) { - armag += offsetof(struct ar_hdr, ar_fmag) + strlen(ThinArchiveMagic); - if (strncmp(armag, ARFMAG, 2) == 0) { - return ArchiveType::ThinArchive; - } - } + ArchiveType result = ArchiveType::Invalid; + if (strncmp(armag, ArchiveMagic, SARMAG) == 0) + result = ArchiveType::Archive; + else if (strncmp(armag, ThinArchiveMagic, SARMAG) == 0) + result = ArchiveType::ThinArchive; + else + return ArchiveType::Invalid; + + armag += offsetof(struct ar_hdr, ar_fmag) + SARMAG; + if (strncmp(armag, ARFMAG, 2) == 0) + return result; return ArchiveType::Invalid; } @@ -623,7 +477,7 @@ std::chrono::seconds(object->modification_time)); spec.GetObjectName() = object->ar_name; spec.SetObjectOffset(object_file_offset); - spec.SetObjectSize(file_size - object_file_offset); + spec.SetObjectSize(object->file_size); spec.GetObjectModificationTime() = object_mod_time; } }
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits