Author: Jonas Devlieghere Date: 2025-08-19T16:00:31-07:00 New Revision: 7cd61793edbb4e6dbc0fb92e5d6d85cabfa62b40
URL: https://github.com/llvm/llvm-project/commit/7cd61793edbb4e6dbc0fb92e5d6d85cabfa62b40 DIFF: https://github.com/llvm/llvm-project/commit/7cd61793edbb4e6dbc0fb92e5d6d85cabfa62b40.diff LOG: [lldb] Improve error handling in ObjectFileWasm (#154433) Improve error handling in ObjectFileWasm by using helpers that wrap their result in an llvm::Expected. The helper to read a Wasm string now return an Expected<std::string> and I created a helper to parse 32-bit ULEBs that returns an Expected<uint32_t>. Added: Modified: lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp Removed: ################################################################################ diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index a000b34fbb549..777b20e9bb0f6 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -36,6 +36,41 @@ LLDB_PLUGIN_DEFINE(ObjectFileWasm) static const uint32_t kWasmHeaderSize = sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion); +/// Helper to read a 32-bit ULEB using LLDB's DataExtractor. +static inline llvm::Expected<uint32_t> GetULEB32(DataExtractor &data, + lldb::offset_t &offset) { + const uint64_t value = data.GetULEB128(&offset); + if (value > std::numeric_limits<uint32_t>::max()) + return llvm::createStringError("ULEB exceeds 32 bits"); + return value; +} + +/// Helper to read a 32-bit ULEB using LLVM's DataExtractor. +static inline llvm::Expected<uint32_t> +GetULEB32(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { + const uint64_t value = data.getULEB128(c); + if (!c) + return c.takeError(); + if (value > std::numeric_limits<uint32_t>::max()) + return llvm::createStringError("ULEB exceeds 32 bits"); + return value; +} + +/// Helper to read a Wasm string, whcih is encoded as a vector of UTF-8 codes. +static inline llvm::Expected<std::string> +GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { + llvm::Expected<uint32_t> len = GetULEB32(data, c); + if (!len) + return len.takeError(); + + llvm::SmallVector<uint8_t, 32> str_storage; + data.getU8(c, str_storage, *len); + if (!c) + return c.takeError(); + + return std::string(toStringRef(llvm::ArrayRef(str_storage))); +} + /// Checks whether the data buffer starts with a valid Wasm module header. static bool ValidateModuleHeader(const DataBufferSP &data_sp) { if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) @@ -51,32 +86,6 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) { return version == llvm::wasm::WasmVersion; } -// FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor. -static std::optional<std::string> -GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { - // A Wasm string is encoded as a vector of UTF-8 codes. - // Vectors are encoded with their u32 length followed by the element - // sequence. - uint64_t len = data.getULEB128(c); - if (!c) { - consumeError(c.takeError()); - return std::nullopt; - } - - if (len > std::numeric_limits<uint32_t>::max()) { - return std::nullopt; - } - - llvm::SmallVector<uint8_t, 32> str_storage; - data.getU8(c, str_storage, len); - if (!c) { - consumeError(c.takeError()); - return std::nullopt; - } - - return std::string(toStringRef(llvm::ArrayRef(str_storage))); -} - char ObjectFileWasm::ID; void ObjectFileWasm::Initialize() { @@ -183,9 +192,12 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { // identifying the custom section, followed by an uninterpreted sequence // of bytes. lldb::offset_t prev_offset = c.tell(); - std::optional<std::string> sect_name = GetWasmString(data, c); - if (!sect_name) + llvm::Expected<std::string> sect_name = GetWasmString(data, c); + if (!sect_name) { + LLDB_LOG_ERROR(GetLog(LLDBLog::Object), sect_name.takeError(), + "failed to parse section name: {0}"); return false; + } if (payload_len < c.tell() - prev_offset) return false; @@ -255,26 +267,26 @@ ParseFunctions(SectionSP code_section_sp) { code_section_sp->GetSectionData(data); lldb::offset_t offset = 0; - const uint64_t function_count = data.GetULEB128(&offset); - if (function_count > std::numeric_limits<uint32_t>::max()) - return llvm::createStringError("function count overflows uint32_t"); + llvm::Expected<uint32_t> function_count = GetULEB32(data, offset); + if (!function_count) + return function_count.takeError(); std::vector<AddressRange> functions; - functions.reserve(function_count); + functions.reserve(*function_count); - for (uint32_t i = 0; i < function_count; ++i) { - const uint64_t function_size = data.GetULEB128(&offset); - if (function_size > std::numeric_limits<uint32_t>::max()) - return llvm::createStringError("function size overflows uint32_t"); + for (uint32_t i = 0; i < *function_count; ++i) { + llvm::Expected<uint32_t> function_size = GetULEB32(data, offset); + if (!function_size) + return function_size.takeError(); // llvm-objdump considers the ULEB with the function size to be part of the // function. We can't do that here because that would break symbolic // breakpoints, as that address is never executed. - functions.emplace_back(code_section_sp, offset, function_size); + functions.emplace_back(code_section_sp, offset, *function_size); std::optional<lldb::offset_t> next_offset = - llvm::checkedAddUnsigned(offset, function_size); + llvm::checkedAddUnsigned<lldb::offset_t>(offset, *function_size); if (!next_offset) - return llvm::createStringError("function offset overflows uint64_t"); + return llvm::createStringError("function offset overflows 64 bits"); offset = *next_offset; } @@ -295,44 +307,44 @@ ParseData(SectionSP data_section_sp) { lldb::offset_t offset = 0; - const uint64_t segment_count = data.GetULEB128(&offset); - if (segment_count > std::numeric_limits<uint32_t>::max()) - return llvm::createStringError("segment count overflows uint32_t"); + llvm::Expected<uint32_t> segment_count = GetULEB32(data, offset); + if (!segment_count) + return segment_count.takeError(); std::vector<WasmSegment> segments; - segments.reserve(segment_count); + segments.reserve(*segment_count); - for (uint32_t i = 0; i < segment_count; ++i) { - const uint64_t flags = data.GetULEB128(&offset); - if (flags > std::numeric_limits<uint32_t>::max()) - return llvm::createStringError("segment flags overflows uint32_t"); + for (uint32_t i = 0; i < *segment_count; ++i) { + llvm::Expected<uint32_t> flags = GetULEB32(data, offset); + if (!flags) + return flags.takeError(); // Data segments have a mode that identifies them as either passive or // active. An active data segment copies its contents into a memory during // instantiation, as specified by a memory index and a constant expression // defining an offset into that memory. - if (flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) { - const uint64_t memidx = data.GetULEB128(&offset); - if (memidx > std::numeric_limits<uint32_t>::max()) - return llvm::createStringError("memidx overflows uint32_t"); + if (*flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) { + llvm::Expected<uint32_t> memidx = GetULEB32(data, offset); + if (!memidx) + return memidx.takeError(); } - if ((flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) { + if ((*flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) { // Skip over the constant expression. for (uint8_t b = 0; b != llvm::wasm::WASM_OPCODE_END;) b = data.GetU8(&offset); } - const uint64_t segment_size = data.GetULEB128(&offset); - if (segment_size > std::numeric_limits<uint32_t>::max()) - return llvm::createStringError("segment size overflows uint32_t"); + llvm::Expected<uint32_t> segment_size = GetULEB32(data, offset); + if (!segment_size) + return segment_size.takeError(); - segments.emplace_back(data_section_sp, offset, segment_size); + segments.emplace_back(data_section_sp, offset, *segment_size); std::optional<lldb::offset_t> next_offset = - llvm::checkedAddUnsigned(offset, segment_size); + llvm::checkedAddUnsigned<lldb::offset_t>(offset, *segment_size); if (!next_offset) - return llvm::createStringError("segment offset overflows uint64_t"); + return llvm::createStringError("segment offset overflows 64 bits"); offset = *next_offset; } @@ -351,9 +363,9 @@ ParseNames(SectionSP name_section_sp, std::vector<Symbol> symbols; while (c && c.tell() < data.size()) { const uint8_t type = data.getU8(c); - const uint64_t size = data.getULEB128(c); - if (size > std::numeric_limits<uint32_t>::max()) - return llvm::createStringError("size overflows uint32_t"); + llvm::Expected<uint32_t> size = GetULEB32(data, c); + if (!size) + return size.takeError(); switch (type) { case llvm::wasm::WASM_NAMES_FUNCTION: { @@ -362,26 +374,34 @@ ParseNames(SectionSP name_section_sp, return llvm::createStringError("function count overflows uint32_t"); for (uint64_t i = 0; c && i < count; ++i) { - const uint64_t idx = data.getULEB128(c); - const std::optional<std::string> name = GetWasmString(data, c); - if (!name || idx >= function_ranges.size()) + llvm::Expected<uint32_t> idx = GetULEB32(data, c); + if (!idx) + return idx.takeError(); + llvm::Expected<std::string> name = GetWasmString(data, c); + if (!name) + return name.takeError(); + if (*idx >= function_ranges.size()) continue; symbols.emplace_back( symbols.size(), Mangled(*name), lldb::eSymbolTypeCode, /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false, - /*is_artificial=*/false, function_ranges[idx], + /*is_artificial=*/false, function_ranges[*idx], /*size_is_valid=*/true, /*contains_linker_annotations=*/false, /*flags=*/0); } } break; case llvm::wasm::WASM_NAMES_DATA_SEGMENT: { - const uint64_t count = data.getULEB128(c); - if (count > std::numeric_limits<uint32_t>::max()) - return llvm::createStringError("data count overflows uint32_t"); - for (uint64_t i = 0; c && i < count; ++i) { - const uint64_t idx = data.getULEB128(c); - const std::optional<std::string> name = GetWasmString(data, c); - if (!name || idx >= segments.size()) + llvm::Expected<uint32_t> count = GetULEB32(data, c); + if (!count) + return count.takeError(); + for (uint32_t i = 0; c && i < *count; ++i) { + llvm::Expected<uint32_t> idx = GetULEB32(data, c); + if (!idx) + return idx.takeError(); + llvm::Expected<std::string> name = GetWasmString(data, c); + if (!name) + return name.takeError(); + if (*idx >= segments.size()) continue; // Update the segment name. segments[i].name = *name; @@ -397,9 +417,10 @@ ParseNames(SectionSP name_section_sp, case llvm::wasm::WASM_NAMES_GLOBAL: case llvm::wasm::WASM_NAMES_LOCAL: default: - std::optional<uint64_t> offset = llvm::checkedAddUnsigned(c.tell(), size); + std::optional<lldb::offset_t> offset = + llvm::checkedAddUnsigned<lldb::offset_t>(c.tell(), *size); if (!offset) - return llvm::createStringError("offset overflows uint64_t"); + return llvm::createStringError("offset overflows 64 bits"); c.seek(*offset); } } @@ -636,11 +657,15 @@ std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { const uint32_t kBufferSize = 1024; DataExtractor section_header_data = ReadImageData(sect_info.offset, kBufferSize); + llvm::DataExtractor data = section_header_data.GetAsLLVM(); llvm::DataExtractor::Cursor c(0); - std::optional<std::string> symbols_url = GetWasmString(data, c); - if (symbols_url) - return FileSpec(*symbols_url); + llvm::Expected<std::string> symbols_url = GetWasmString(data, c); + if (!symbols_url) { + llvm::consumeError(symbols_url.takeError()); + return std::nullopt; + } + return FileSpec(*symbols_url); } } return std::nullopt; _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits