https://github.com/JDevlieghere created 
https://github.com/llvm/llvm-project/pull/153494

This PR adds support for parsing data symbols from the WebAssembly name section.

>From e68b6d31ab7ba418b23c7289e12d0befeb07f6d6 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jo...@devlieghere.com>
Date: Wed, 13 Aug 2025 13:44:35 -0700
Subject: [PATCH] [lldb] Support parsing data symbols from the Wasm name
 section

This PR adds support for parsing data symbols from the WebAssembly name
section.
---
 .../ObjectFile/wasm/ObjectFileWasm.cpp        | 114 ++++++++++++++----
 1 file changed, 92 insertions(+), 22 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp 
b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index 919cc21c32ffd..b3144f28f4913 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -251,11 +251,11 @@ bool ObjectFileWasm::ParseHeader() {
 
 static llvm::Expected<std::vector<AddressRange>>
 ParseFunctions(SectionSP code_section_sp) {
-  DataExtractor code_section_data;
-  code_section_sp->GetSectionData(code_section_data);
+  DataExtractor data;
+  code_section_sp->GetSectionData(data);
   lldb::offset_t offset = 0;
 
-  const uint64_t function_count = code_section_data.GetULEB128(&offset);
+  const uint64_t function_count = data.GetULEB128(&offset);
   if (function_count > std::numeric_limits<uint32_t>::max())
     return llvm::createStringError("function count overflows uint32_t");
 
@@ -263,7 +263,7 @@ ParseFunctions(SectionSP code_section_sp) {
   functions.reserve(function_count);
 
   for (uint32_t i = 0; i < function_count; ++i) {
-    const uint64_t function_size = code_section_data.GetULEB128(&offset);
+    const uint64_t function_size = data.GetULEB128(&offset);
     if (function_size > std::numeric_limits<uint32_t>::max())
       return llvm::createStringError("function size overflows uint32_t");
     // llvm-objdump considers the ULEB with the function size to be part of the
@@ -281,9 +281,45 @@ ParseFunctions(SectionSP code_section_sp) {
   return functions;
 }
 
+static llvm::Expected<std::vector<AddressRange>>
+ParseData(SectionSP data_section_sp) {
+  DataExtractor data;
+  data_section_sp->GetSectionData(data);
+
+  lldb::offset_t offset = 0;
+
+  const uint64_t segment_count = data.GetULEB128(&offset);
+  if (segment_count > std::numeric_limits<uint32_t>::max())
+    return llvm::createStringError("segment count overflows uint32_t");
+
+  std::vector<AddressRange> segments;
+  segments.reserve(segment_count);
+
+  for (uint32_t i = 0; i < segment_count; ++i) {
+    const uint64_t flags = data.GetULEB128(&offset);
+    if (flags > std::numeric_limits<uint32_t>::max())
+      return llvm::createStringError("segment flags overflows uint32_t");
+
+    const uint64_t segment_size = data.GetULEB128(&offset);
+    if (flags > std::numeric_limits<uint32_t>::max())
+      return llvm::createStringError("segment size overflows uint32_t");
+
+    segments.emplace_back(data_section_sp, offset, segment_size);
+
+    std::optional<lldb::offset_t> next_offset =
+        llvm::checkedAddUnsigned(offset, segment_size);
+    if (!next_offset)
+      return llvm::createStringError("segment offset overflows uint64_t");
+    offset = *next_offset;
+  }
+
+  return segments;
+}
+
 static llvm::Expected<std::vector<Symbol>>
 ParseNames(SectionSP name_section_sp,
-           const std::vector<AddressRange> &functions) {
+           const std::vector<AddressRange> &function_ranges,
+           const std::vector<AddressRange> &segment_ranges) {
   DataExtractor name_section_data;
   name_section_sp->GetSectionData(name_section_data);
 
@@ -305,17 +341,34 @@ ParseNames(SectionSP name_section_sp,
       for (uint64_t i = 0; c && i < count; ++i) {
         const uint64_t idx = data.getULEB128(c);
         const std::optional<std::string> name = GetWasmString(data, c);
-        if (!name || idx >= functions.size())
+        if (!name || idx >= function_ranges.size())
           continue;
         symbols.emplace_back(
             symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
             /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
-            /*is_artificial=*/false, functions[idx],
+            /*is_artificial=*/false, function_ranges[idx],
             /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
             /*flags=*/0);
       }
     } break;
-    case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
+    case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
+      const uint64_t count = data.getULEB128(c);
+      if (count > std::numeric_limits<uint32_t>::max())
+        return llvm::createStringError("data count overflows uint32_t");
+      for (uint64_t i = 0; c && i < count; ++i) {
+        const uint64_t idx = data.getULEB128(c);
+        const std::optional<std::string> name = GetWasmString(data, c);
+        if (!name || idx >= segment_ranges.size())
+          continue;
+        symbols.emplace_back(
+            symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
+            /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
+            /*is_artificial=*/false, segment_ranges[idx],
+            /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
+            /*flags=*/0);
+      }
+
+    } break;
     case llvm::wasm::WASM_NAMES_GLOBAL:
     case llvm::wasm::WASM_NAMES_LOCAL:
     default:
@@ -336,21 +389,35 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
   assert(m_sections_up && "sections must be parsed");
   Log *log = GetLog(LLDBLog::Object);
 
-  // The name section contains names and indexes. First parse the functions 
from
-  // the code section so we can access them by their index.
-  SectionSP code_section_sp =
-      m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false);
-  if (!code_section_sp) {
-    LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section");
-    return;
+  // The name section contains names and indexes. First parse the data from the
+  // relevant sections so we can access it by its index.
+  std::vector<AddressRange> function_ranges;
+  std::vector<AddressRange> segment_ranges;
+
+  // Parse the code section.
+  if (SectionSP code_section_sp =
+          m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
+    llvm::Expected<std::vector<AddressRange>> functions =
+        ParseFunctions(code_section_sp);
+    if (!functions) {
+      LLDB_LOG_ERROR(log, functions.takeError(),
+                     "Failed to parse Wasm code section: {0}");
+      return;
+    }
+    function_ranges = *functions;
   }
 
-  llvm::Expected<std::vector<AddressRange>> functions =
-      ParseFunctions(code_section_sp);
-  if (!functions) {
-    LLDB_LOG_ERROR(log, functions.takeError(),
-                   "Failed to parse Wasm functions: {0}");
-    return;
+  // Parse the data section.
+  if (SectionSP data_section_sp =
+          m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) {
+    llvm::Expected<std::vector<AddressRange>> segments =
+        ParseData(data_section_sp);
+    if (!segments) {
+      LLDB_LOG_ERROR(log, segments.takeError(),
+                     "Failed to parse Wasm data section: {0}");
+      return;
+    }
+    segment_ranges = *segments;
   }
 
   // Parse the name section.
@@ -362,7 +429,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
   }
 
   llvm::Expected<std::vector<Symbol>> symbols =
-      ParseNames(name_section_sp, *functions);
+      ParseNames(name_section_sp, function_ranges, segment_ranges);
   if (!symbols) {
     LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: 
{0}");
     return;
@@ -408,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList 
&unified_section_list) {
       // For this reason Section::GetFileAddress() must return zero for the
       // Code section.
       vm_addr = 0;
+    } else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) {
+      section_type = eSectionTypeData;
+      section_name = ConstString("data");
     } else {
       section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
       if (section_type == eSectionTypeOther)

_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to