Author: Jonas Devlieghere
Date: 2025-08-19T16:00:31-07:00
New Revision: 7cd61793edbb4e6dbc0fb92e5d6d85cabfa62b40

URL: 
https://github.com/llvm/llvm-project/commit/7cd61793edbb4e6dbc0fb92e5d6d85cabfa62b40
DIFF: 
https://github.com/llvm/llvm-project/commit/7cd61793edbb4e6dbc0fb92e5d6d85cabfa62b40.diff

LOG: [lldb] Improve error handling in ObjectFileWasm (#154433)

Improve error handling in ObjectFileWasm by using helpers that wrap
their result in an llvm::Expected. The helper to read a Wasm string now
return an Expected<std::string> and I created a helper to parse 32-bit
ULEBs that returns an Expected<uint32_t>.

Added: 
    

Modified: 
    lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp 
b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index a000b34fbb549..777b20e9bb0f6 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -36,6 +36,41 @@ LLDB_PLUGIN_DEFINE(ObjectFileWasm)
 static const uint32_t kWasmHeaderSize =
     sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
 
+/// Helper to read a 32-bit ULEB using LLDB's DataExtractor.
+static inline llvm::Expected<uint32_t> GetULEB32(DataExtractor &data,
+                                                 lldb::offset_t &offset) {
+  const uint64_t value = data.GetULEB128(&offset);
+  if (value > std::numeric_limits<uint32_t>::max())
+    return llvm::createStringError("ULEB exceeds 32 bits");
+  return value;
+}
+
+/// Helper to read a 32-bit ULEB using LLVM's DataExtractor.
+static inline llvm::Expected<uint32_t>
+GetULEB32(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
+  const uint64_t value = data.getULEB128(c);
+  if (!c)
+    return c.takeError();
+  if (value > std::numeric_limits<uint32_t>::max())
+    return llvm::createStringError("ULEB exceeds 32 bits");
+  return value;
+}
+
+/// Helper to read a Wasm string, whcih is encoded as a vector of UTF-8 codes.
+static inline llvm::Expected<std::string>
+GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
+  llvm::Expected<uint32_t> len = GetULEB32(data, c);
+  if (!len)
+    return len.takeError();
+
+  llvm::SmallVector<uint8_t, 32> str_storage;
+  data.getU8(c, str_storage, *len);
+  if (!c)
+    return c.takeError();
+
+  return std::string(toStringRef(llvm::ArrayRef(str_storage)));
+}
+
 /// Checks whether the data buffer starts with a valid Wasm module header.
 static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
   if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
@@ -51,32 +86,6 @@ static bool ValidateModuleHeader(const DataBufferSP 
&data_sp) {
   return version == llvm::wasm::WasmVersion;
 }
 
-// FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor.
-static std::optional<std::string>
-GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
-  // A Wasm string is encoded as a vector of UTF-8 codes.
-  // Vectors are encoded with their u32 length followed by the element
-  // sequence.
-  uint64_t len = data.getULEB128(c);
-  if (!c) {
-    consumeError(c.takeError());
-    return std::nullopt;
-  }
-
-  if (len > std::numeric_limits<uint32_t>::max()) {
-    return std::nullopt;
-  }
-
-  llvm::SmallVector<uint8_t, 32> str_storage;
-  data.getU8(c, str_storage, len);
-  if (!c) {
-    consumeError(c.takeError());
-    return std::nullopt;
-  }
-
-  return std::string(toStringRef(llvm::ArrayRef(str_storage)));
-}
-
 char ObjectFileWasm::ID;
 
 void ObjectFileWasm::Initialize() {
@@ -183,9 +192,12 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t 
*offset_ptr) {
     // identifying the custom section, followed by an uninterpreted sequence
     // of bytes.
     lldb::offset_t prev_offset = c.tell();
-    std::optional<std::string> sect_name = GetWasmString(data, c);
-    if (!sect_name)
+    llvm::Expected<std::string> sect_name = GetWasmString(data, c);
+    if (!sect_name) {
+      LLDB_LOG_ERROR(GetLog(LLDBLog::Object), sect_name.takeError(),
+                     "failed to parse section name: {0}");
       return false;
+    }
 
     if (payload_len < c.tell() - prev_offset)
       return false;
@@ -255,26 +267,26 @@ ParseFunctions(SectionSP code_section_sp) {
   code_section_sp->GetSectionData(data);
   lldb::offset_t offset = 0;
 
-  const uint64_t function_count = data.GetULEB128(&offset);
-  if (function_count > std::numeric_limits<uint32_t>::max())
-    return llvm::createStringError("function count overflows uint32_t");
+  llvm::Expected<uint32_t> function_count = GetULEB32(data, offset);
+  if (!function_count)
+    return function_count.takeError();
 
   std::vector<AddressRange> functions;
-  functions.reserve(function_count);
+  functions.reserve(*function_count);
 
-  for (uint32_t i = 0; i < function_count; ++i) {
-    const uint64_t function_size = data.GetULEB128(&offset);
-    if (function_size > std::numeric_limits<uint32_t>::max())
-      return llvm::createStringError("function size overflows uint32_t");
+  for (uint32_t i = 0; i < *function_count; ++i) {
+    llvm::Expected<uint32_t> function_size = GetULEB32(data, offset);
+    if (!function_size)
+      return function_size.takeError();
     // llvm-objdump considers the ULEB with the function size to be part of the
     // function. We can't do that here because that would break symbolic
     // breakpoints, as that address is never executed.
-    functions.emplace_back(code_section_sp, offset, function_size);
+    functions.emplace_back(code_section_sp, offset, *function_size);
 
     std::optional<lldb::offset_t> next_offset =
-        llvm::checkedAddUnsigned(offset, function_size);
+        llvm::checkedAddUnsigned<lldb::offset_t>(offset, *function_size);
     if (!next_offset)
-      return llvm::createStringError("function offset overflows uint64_t");
+      return llvm::createStringError("function offset overflows 64 bits");
     offset = *next_offset;
   }
 
@@ -295,44 +307,44 @@ ParseData(SectionSP data_section_sp) {
 
   lldb::offset_t offset = 0;
 
-  const uint64_t segment_count = data.GetULEB128(&offset);
-  if (segment_count > std::numeric_limits<uint32_t>::max())
-    return llvm::createStringError("segment count overflows uint32_t");
+  llvm::Expected<uint32_t> segment_count = GetULEB32(data, offset);
+  if (!segment_count)
+    return segment_count.takeError();
 
   std::vector<WasmSegment> segments;
-  segments.reserve(segment_count);
+  segments.reserve(*segment_count);
 
-  for (uint32_t i = 0; i < segment_count; ++i) {
-    const uint64_t flags = data.GetULEB128(&offset);
-    if (flags > std::numeric_limits<uint32_t>::max())
-      return llvm::createStringError("segment flags overflows uint32_t");
+  for (uint32_t i = 0; i < *segment_count; ++i) {
+    llvm::Expected<uint32_t> flags = GetULEB32(data, offset);
+    if (!flags)
+      return flags.takeError();
 
     // Data segments have a mode that identifies them as either passive or
     // active. An active data segment copies its contents into a memory during
     // instantiation, as specified by a memory index and a constant expression
     // defining an offset into that memory.
-    if (flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) {
-      const uint64_t memidx = data.GetULEB128(&offset);
-      if (memidx > std::numeric_limits<uint32_t>::max())
-        return llvm::createStringError("memidx overflows uint32_t");
+    if (*flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) {
+      llvm::Expected<uint32_t> memidx = GetULEB32(data, offset);
+      if (!memidx)
+        return memidx.takeError();
     }
 
-    if ((flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
+    if ((*flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
       // Skip over the constant expression.
       for (uint8_t b = 0; b != llvm::wasm::WASM_OPCODE_END;)
         b = data.GetU8(&offset);
     }
 
-    const uint64_t segment_size = data.GetULEB128(&offset);
-    if (segment_size > std::numeric_limits<uint32_t>::max())
-      return llvm::createStringError("segment size overflows uint32_t");
+    llvm::Expected<uint32_t> segment_size = GetULEB32(data, offset);
+    if (!segment_size)
+      return segment_size.takeError();
 
-    segments.emplace_back(data_section_sp, offset, segment_size);
+    segments.emplace_back(data_section_sp, offset, *segment_size);
 
     std::optional<lldb::offset_t> next_offset =
-        llvm::checkedAddUnsigned(offset, segment_size);
+        llvm::checkedAddUnsigned<lldb::offset_t>(offset, *segment_size);
     if (!next_offset)
-      return llvm::createStringError("segment offset overflows uint64_t");
+      return llvm::createStringError("segment offset overflows 64 bits");
     offset = *next_offset;
   }
 
@@ -351,9 +363,9 @@ ParseNames(SectionSP name_section_sp,
   std::vector<Symbol> symbols;
   while (c && c.tell() < data.size()) {
     const uint8_t type = data.getU8(c);
-    const uint64_t size = data.getULEB128(c);
-    if (size > std::numeric_limits<uint32_t>::max())
-      return llvm::createStringError("size overflows uint32_t");
+    llvm::Expected<uint32_t> size = GetULEB32(data, c);
+    if (!size)
+      return size.takeError();
 
     switch (type) {
     case llvm::wasm::WASM_NAMES_FUNCTION: {
@@ -362,26 +374,34 @@ ParseNames(SectionSP name_section_sp,
         return llvm::createStringError("function count overflows uint32_t");
 
       for (uint64_t i = 0; c && i < count; ++i) {
-        const uint64_t idx = data.getULEB128(c);
-        const std::optional<std::string> name = GetWasmString(data, c);
-        if (!name || idx >= function_ranges.size())
+        llvm::Expected<uint32_t> idx = GetULEB32(data, c);
+        if (!idx)
+          return idx.takeError();
+        llvm::Expected<std::string> name = GetWasmString(data, c);
+        if (!name)
+          return name.takeError();
+        if (*idx >= function_ranges.size())
           continue;
         symbols.emplace_back(
             symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
             /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
-            /*is_artificial=*/false, function_ranges[idx],
+            /*is_artificial=*/false, function_ranges[*idx],
             /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
             /*flags=*/0);
       }
     } break;
     case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
-      const uint64_t count = data.getULEB128(c);
-      if (count > std::numeric_limits<uint32_t>::max())
-        return llvm::createStringError("data count overflows uint32_t");
-      for (uint64_t i = 0; c && i < count; ++i) {
-        const uint64_t idx = data.getULEB128(c);
-        const std::optional<std::string> name = GetWasmString(data, c);
-        if (!name || idx >= segments.size())
+      llvm::Expected<uint32_t> count = GetULEB32(data, c);
+      if (!count)
+        return count.takeError();
+      for (uint32_t i = 0; c && i < *count; ++i) {
+        llvm::Expected<uint32_t> idx = GetULEB32(data, c);
+        if (!idx)
+          return idx.takeError();
+        llvm::Expected<std::string> name = GetWasmString(data, c);
+        if (!name)
+          return name.takeError();
+        if (*idx >= segments.size())
           continue;
         // Update the segment name.
         segments[i].name = *name;
@@ -397,9 +417,10 @@ ParseNames(SectionSP name_section_sp,
     case llvm::wasm::WASM_NAMES_GLOBAL:
     case llvm::wasm::WASM_NAMES_LOCAL:
     default:
-      std::optional<uint64_t> offset = llvm::checkedAddUnsigned(c.tell(), 
size);
+      std::optional<lldb::offset_t> offset =
+          llvm::checkedAddUnsigned<lldb::offset_t>(c.tell(), *size);
       if (!offset)
-        return llvm::createStringError("offset overflows uint64_t");
+        return llvm::createStringError("offset overflows 64 bits");
       c.seek(*offset);
     }
   }
@@ -636,11 +657,15 @@ std::optional<FileSpec> 
ObjectFileWasm::GetExternalDebugInfoFileSpec() {
       const uint32_t kBufferSize = 1024;
       DataExtractor section_header_data =
           ReadImageData(sect_info.offset, kBufferSize);
+
       llvm::DataExtractor data = section_header_data.GetAsLLVM();
       llvm::DataExtractor::Cursor c(0);
-      std::optional<std::string> symbols_url = GetWasmString(data, c);
-      if (symbols_url)
-        return FileSpec(*symbols_url);
+      llvm::Expected<std::string> symbols_url = GetWasmString(data, c);
+      if (!symbols_url) {
+        llvm::consumeError(symbols_url.takeError());
+        return std::nullopt;
+      }
+      return FileSpec(*symbols_url);
     }
   }
   return std::nullopt;


        
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to