lemo created this revision.
lemo added reviewers: amccarth, labath.
lemo added a project: LLDB.
Herald added a subscriber: mgrang.

Corrupted minidumps was leading to unpredictable behavior.

This change adds explicit consistency checks for the minidump early on. The 
checks are not comprehensive but they should catch obvious structural 
violations:

- streams with type == 0
- duplicate streams (same type)
- overlapping streams
- truncated minidumps

Another early check is to make sure we actually support the minidump 
architecture instead of crashing at a random place deep inside LLDB.


https://reviews.llvm.org/D49202

Files:
  source/Plugins/Process/minidump/MinidumpParser.cpp
  source/Plugins/Process/minidump/MinidumpParser.h
  source/Plugins/Process/minidump/MinidumpTypes.cpp
  source/Plugins/Process/minidump/ProcessMinidump.cpp
  unittests/Process/minidump/MinidumpParserTest.cpp

Index: unittests/Process/minidump/MinidumpParserTest.cpp
===================================================================
--- unittests/Process/minidump/MinidumpParserTest.cpp
+++ unittests/Process/minidump/MinidumpParserTest.cpp
@@ -39,15 +39,21 @@
 class MinidumpParserTest : public testing::Test {
 public:
   void SetUpData(const char *minidump_filename,
-                 uint64_t load_size = UINT64_MAX) {
+                 uint64_t load_size = UINT64_MAX,
+                 bool expect_failure = false) {
     std::string filename = GetInputFilePath(minidump_filename);
     auto BufferPtr = DataBufferLLVM::CreateSliceFromPath(filename, load_size, 0);
 
     llvm::Optional<MinidumpParser> optional_parser =
         MinidumpParser::Create(BufferPtr);
     ASSERT_TRUE(optional_parser.hasValue());
     parser.reset(new MinidumpParser(optional_parser.getValue()));
     ASSERT_GT(parser->GetData().size(), 0UL);
+    auto result = parser->Initialize();
+    if (expect_failure)
+      ASSERT_TRUE(result.Fail());
+    else
+      ASSERT_TRUE(result.Success()) << result.AsCString();
   }
 
   std::unique_ptr<MinidumpParser> parser;
@@ -68,12 +74,10 @@
   EXPECT_EQ(1232UL, context.size());
 }
 
-TEST_F(MinidumpParserTest, GetThreadsTruncatedFile) {
-  SetUpData("linux-x86_64.dmp", 200);
-  llvm::ArrayRef<MinidumpThread> thread_list;
-
-  thread_list = parser->GetThreads();
-  ASSERT_EQ(0UL, thread_list.size());
+TEST_F(MinidumpParserTest, TruncatedMinidumps) {
+  SetUpData("linux-x86_64.dmp", 32, true);
+  SetUpData("linux-x86_64.dmp", 100, true);
+  SetUpData("linux-x86_64.dmp", 20 * 1024, true);
 }
 
 TEST_F(MinidumpParserTest, GetArchitecture) {
Index: source/Plugins/Process/minidump/ProcessMinidump.cpp
===================================================================
--- source/Plugins/Process/minidump/ProcessMinidump.cpp
+++ source/Plugins/Process/minidump/ProcessMinidump.cpp
@@ -105,7 +105,7 @@
   if (!DataPtr)
     return nullptr;
 
-  assert(DataPtr->GetByteSize() == header_size);
+  lldbassert(DataPtr->GetByteSize() == header_size);
 
   // first, only try to parse the header, beacuse we need to be fast
   llvm::ArrayRef<uint8_t> HeaderBytes = DataPtr->GetData();
@@ -164,14 +164,33 @@
 Status ProcessMinidump::DoLoadCore() {
   Status error;
 
+  // Minidump parser initialization & consistency checks
+  error = m_minidump_parser.Initialize();
+  if (error.Fail())
+    return error;
+
+  // Do we support the minidump's architecture?
+  ArchSpec arch = GetArchitecture();
+  switch (arch.GetMachine()) {
+  case llvm::Triple::x86:
+  case llvm::Triple::x86_64:
+    // supported
+    break;
+
+  default:
+    error.SetErrorStringWithFormat("Unsupported minidump architecture: %s",
+                                   arch.GetArchitectureName());
+    return error;
+  }
+
   m_thread_list = m_minidump_parser.GetThreads();
   m_active_exception = m_minidump_parser.GetExceptionStream();
   ReadModuleList();
-  GetTarget().SetArchitecture(GetArchitecture());
+  GetTarget().SetArchitecture(arch);
 
   llvm::Optional<lldb::pid_t> pid = m_minidump_parser.GetPid();
   if (!pid) {
-    error.SetErrorString("failed to parse PID");
+    error.SetErrorString("Failed to parse PID");
     return error;
   }
   SetID(pid.getValue());
Index: source/Plugins/Process/minidump/MinidumpTypes.cpp
===================================================================
--- source/Plugins/Process/minidump/MinidumpTypes.cpp
+++ source/Plugins/Process/minidump/MinidumpTypes.cpp
@@ -33,9 +33,6 @@
       version != MinidumpHeaderConstants::Version)
     return nullptr;
 
-  // TODO check for max number of streams ?
-  // TODO more sanity checks ?
-
   return header;
 }
 
Index: source/Plugins/Process/minidump/MinidumpParser.h
===================================================================
--- source/Plugins/Process/minidump/MinidumpParser.h
+++ source/Plugins/Process/minidump/MinidumpParser.h
@@ -89,14 +89,16 @@
 
   llvm::Optional<MemoryRegionInfo> GetMemoryRegionInfo(lldb::addr_t);
 
+  // Perform consistency checks and initialize internal data structures
+  Status Initialize();
+
+private:
+  MinidumpParser(const lldb::DataBufferSP &data_buf_sp);
+
 private:
   lldb::DataBufferSP m_data_sp;
-  const MinidumpHeader *m_header;
+  const MinidumpHeader *m_header = nullptr;
   llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> m_directory_map;
-
-  MinidumpParser(
-      const lldb::DataBufferSP &data_buf_sp, const MinidumpHeader *header,
-      llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> &&directory_map);
 };
 
 } // end namespace minidump
Index: source/Plugins/Process/minidump/MinidumpParser.cpp
===================================================================
--- source/Plugins/Process/minidump/MinidumpParser.cpp
+++ source/Plugins/Process/minidump/MinidumpParser.cpp
@@ -14,10 +14,14 @@
 
 // Other libraries and framework includes
 #include "lldb/Target/MemoryRegionInfo.h"
+#include "lldb/Utility/LLDBAssert.h"
 
 // C includes
 // C++ includes
+#include <algorithm>
 #include <map>
+#include <unordered_set>
+#include <vector>
 
 using namespace lldb_private;
 using namespace minidump;
@@ -27,47 +31,11 @@
   if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) {
     return llvm::None;
   }
-
-  llvm::ArrayRef<uint8_t> header_data(data_buf_sp->GetBytes(),
-                                      sizeof(MinidumpHeader));
-  const MinidumpHeader *header = MinidumpHeader::Parse(header_data);
-
-  if (header == nullptr) {
-    return llvm::None;
-  }
-
-  lldb::offset_t directory_list_offset = header->stream_directory_rva;
-  // check if there is enough data for the parsing of the directory list
-  if ((directory_list_offset +
-       sizeof(MinidumpDirectory) * header->streams_count) >
-      data_buf_sp->GetByteSize()) {
-    return llvm::None;
-  }
-
-  const MinidumpDirectory *directory = nullptr;
-  Status error;
-  llvm::ArrayRef<uint8_t> directory_data(
-      data_buf_sp->GetBytes() + directory_list_offset,
-      sizeof(MinidumpDirectory) * header->streams_count);
-  llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> directory_map;
-
-  for (uint32_t i = 0; i < header->streams_count; ++i) {
-    error = consumeObject(directory_data, directory);
-    if (error.Fail()) {
-      return llvm::None;
-    }
-    directory_map[static_cast<const uint32_t>(directory->stream_type)] =
-        directory->location;
-  }
-
-  return MinidumpParser(data_buf_sp, header, std::move(directory_map));
+  return MinidumpParser(data_buf_sp);
 }
 
-MinidumpParser::MinidumpParser(
-    const lldb::DataBufferSP &data_buf_sp, const MinidumpHeader *header,
-    llvm::DenseMap<uint32_t, MinidumpLocationDescriptor> &&directory_map)
-    : m_data_sp(data_buf_sp), m_header(header), m_directory_map(directory_map) {
-}
+MinidumpParser::MinidumpParser(const lldb::DataBufferSP &data_buf_sp)
+    : m_data_sp(data_buf_sp) {}
 
 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() {
   return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(),
@@ -396,7 +364,7 @@
   // I don't have a sense of how frequently this is called or how many memory
   // ranges a Minidump typically has, so I'm not sure if searching for the
   // appropriate range linearly each time is stupid.  Perhaps we should build
-  // an index for faster lookups.
+  // an i for faster lookups.
   llvm::Optional<minidump::Range> range = FindMemoryRange(addr);
   if (!range)
     return {};
@@ -481,3 +449,110 @@
   // appear truncated.
   return info;
 }
+
+Status MinidumpParser::Initialize() {
+  Status result;
+
+  lldbassert(m_header == nullptr);
+  lldbassert(m_directory_map.empty());
+
+  llvm::ArrayRef<uint8_t> header_data(m_data_sp->GetBytes(),
+                                      sizeof(MinidumpHeader));
+  m_header = MinidumpHeader::Parse(header_data);
+  if (m_header == nullptr) {
+    result.SetErrorString("Invalid minidump: can't parse the header");
+    return result;
+  }
+
+  // A minidump without at least one stream is clearly ill-formed
+  if (m_header->streams_count == 0) {
+    result.SetErrorString("Invalid minidump: no streams present");
+    return result;
+  }
+
+  struct FileRange {
+    uint32_t offset = 0;
+    uint32_t size = 0;
+
+    FileRange(uint32_t offset, uint32_t size) : offset(offset), size(size) {}
+    uint32_t end() const { return offset + size; }
+  };
+
+  const uint32_t file_size = m_data_sp->GetByteSize();
+
+  // Build a global minidump file map, checking for:
+  // - overlapping streams/data structures
+  // - truncation (streams pointing past the end of file)
+  std::vector<FileRange> minidump_map;
+
+  // Add the minidump header to the file map
+  if (sizeof(MinidumpHeader) > file_size) {
+    result.SetErrorString("Invalid minidump: truncated header");
+    return result;
+  }
+  minidump_map.emplace_back( 0, sizeof(MinidumpHeader) );
+
+  // Add the directory entries to the file map
+  FileRange directory_range(m_header->stream_directory_rva,
+                            m_header->streams_count *
+                                sizeof(MinidumpDirectory));
+  if (directory_range.end() > file_size) {
+    result.SetErrorString("Invalid minidump: truncated streams directory");
+    return result;
+  }
+  minidump_map.push_back(directory_range);
+
+  // Parse stream directory entries
+  llvm::ArrayRef<uint8_t> directory_data(
+      m_data_sp->GetBytes() + directory_range.offset, directory_range.size);
+  for (uint32_t i = 0; i < m_header->streams_count; ++i) {
+    const MinidumpDirectory *directory_entry = nullptr;
+    result = consumeObject(directory_data, directory_entry);
+    if (result.Fail())
+      return result;
+    if (directory_entry->stream_type == 0) {
+      // Ignore dummy streams (technically ill-formed, but a number of
+      // existing minidumps seem to contain such streams)
+      if (directory_entry->location.data_size == 0)
+        continue;
+      result.SetErrorString("Invalid minidump: bad stream type");
+      return result;
+    }
+    // Update the streams map, checking for duplicate stream types
+    if (!m_directory_map
+             .insert({directory_entry->stream_type, directory_entry->location})
+             .second) {
+      result.SetErrorString("Invalid minidump: duplicate stream type");
+      return result;
+    }
+    // Ignore the zero-length streams for layout checks
+    if (directory_entry->location.data_size != 0) {
+      minidump_map.emplace_back(directory_entry->location.rva,
+                                directory_entry->location.data_size);
+    }
+  }
+
+  // Sort the file map ranges by start offset
+  std::sort(minidump_map.begin(), minidump_map.end(),
+            [](const FileRange &a, const FileRange &b) {
+              return a.offset < b.offset;
+            });
+
+  // Check for overlapping streams/data structures
+  for (size_t i = 1; i < minidump_map.size(); ++i) {
+    const auto &prev_range = minidump_map[i - 1];
+    if (prev_range.end() > minidump_map[i].offset) {
+      result.SetErrorString("Invalid minidump: overlapping streams");
+      return result;
+    }
+  }
+
+  // Check for streams past the end of file
+  const auto &last_range = minidump_map.back();
+  if (last_range.end() > file_size) {
+    result.SetErrorString("Invalid minidump: truncated stream");
+    return result;
+  }
+
+  return result;
+}
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to