llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lldb

Author: Jonas Devlieghere (JDevlieghere)

<details>
<summary>Changes</summary>

This PR adds support for parsing data symbols from the WebAssembly name section.

---
Full diff: https://github.com/llvm/llvm-project/pull/153494.diff


3 Files Affected:

- (modified) lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp (+92-22) 
- (modified) lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml (+78-19) 
- (modified) lldb/test/Shell/Symtab/symtab-wasm.test (+6-4) 


``````````diff
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp 
b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index 919cc21c32ffd..b3144f28f4913 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -251,11 +251,11 @@ bool ObjectFileWasm::ParseHeader() {
 
 static llvm::Expected<std::vector<AddressRange>>
 ParseFunctions(SectionSP code_section_sp) {
-  DataExtractor code_section_data;
-  code_section_sp->GetSectionData(code_section_data);
+  DataExtractor data;
+  code_section_sp->GetSectionData(data);
   lldb::offset_t offset = 0;
 
-  const uint64_t function_count = code_section_data.GetULEB128(&offset);
+  const uint64_t function_count = data.GetULEB128(&offset);
   if (function_count > std::numeric_limits<uint32_t>::max())
     return llvm::createStringError("function count overflows uint32_t");
 
@@ -263,7 +263,7 @@ ParseFunctions(SectionSP code_section_sp) {
   functions.reserve(function_count);
 
   for (uint32_t i = 0; i < function_count; ++i) {
-    const uint64_t function_size = code_section_data.GetULEB128(&offset);
+    const uint64_t function_size = data.GetULEB128(&offset);
     if (function_size > std::numeric_limits<uint32_t>::max())
       return llvm::createStringError("function size overflows uint32_t");
     // llvm-objdump considers the ULEB with the function size to be part of the
@@ -281,9 +281,45 @@ ParseFunctions(SectionSP code_section_sp) {
   return functions;
 }
 
+static llvm::Expected<std::vector<AddressRange>>
+ParseData(SectionSP data_section_sp) {
+  DataExtractor data;
+  data_section_sp->GetSectionData(data);
+
+  lldb::offset_t offset = 0;
+
+  const uint64_t segment_count = data.GetULEB128(&offset);
+  if (segment_count > std::numeric_limits<uint32_t>::max())
+    return llvm::createStringError("segment count overflows uint32_t");
+
+  std::vector<AddressRange> segments;
+  segments.reserve(segment_count);
+
+  for (uint32_t i = 0; i < segment_count; ++i) {
+    const uint64_t flags = data.GetULEB128(&offset);
+    if (flags > std::numeric_limits<uint32_t>::max())
+      return llvm::createStringError("segment flags overflows uint32_t");
+
+    const uint64_t segment_size = data.GetULEB128(&offset);
+    if (flags > std::numeric_limits<uint32_t>::max())
+      return llvm::createStringError("segment size overflows uint32_t");
+
+    segments.emplace_back(data_section_sp, offset, segment_size);
+
+    std::optional<lldb::offset_t> next_offset =
+        llvm::checkedAddUnsigned(offset, segment_size);
+    if (!next_offset)
+      return llvm::createStringError("segment offset overflows uint64_t");
+    offset = *next_offset;
+  }
+
+  return segments;
+}
+
 static llvm::Expected<std::vector<Symbol>>
 ParseNames(SectionSP name_section_sp,
-           const std::vector<AddressRange> &functions) {
+           const std::vector<AddressRange> &function_ranges,
+           const std::vector<AddressRange> &segment_ranges) {
   DataExtractor name_section_data;
   name_section_sp->GetSectionData(name_section_data);
 
@@ -305,17 +341,34 @@ ParseNames(SectionSP name_section_sp,
       for (uint64_t i = 0; c && i < count; ++i) {
         const uint64_t idx = data.getULEB128(c);
         const std::optional<std::string> name = GetWasmString(data, c);
-        if (!name || idx >= functions.size())
+        if (!name || idx >= function_ranges.size())
           continue;
         symbols.emplace_back(
             symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
             /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
-            /*is_artificial=*/false, functions[idx],
+            /*is_artificial=*/false, function_ranges[idx],
             /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
             /*flags=*/0);
       }
     } break;
-    case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
+    case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
+      const uint64_t count = data.getULEB128(c);
+      if (count > std::numeric_limits<uint32_t>::max())
+        return llvm::createStringError("data count overflows uint32_t");
+      for (uint64_t i = 0; c && i < count; ++i) {
+        const uint64_t idx = data.getULEB128(c);
+        const std::optional<std::string> name = GetWasmString(data, c);
+        if (!name || idx >= segment_ranges.size())
+          continue;
+        symbols.emplace_back(
+            symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
+            /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
+            /*is_artificial=*/false, segment_ranges[idx],
+            /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
+            /*flags=*/0);
+      }
+
+    } break;
     case llvm::wasm::WASM_NAMES_GLOBAL:
     case llvm::wasm::WASM_NAMES_LOCAL:
     default:
@@ -336,21 +389,35 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
   assert(m_sections_up && "sections must be parsed");
   Log *log = GetLog(LLDBLog::Object);
 
-  // The name section contains names and indexes. First parse the functions 
from
-  // the code section so we can access them by their index.
-  SectionSP code_section_sp =
-      m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false);
-  if (!code_section_sp) {
-    LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section");
-    return;
+  // The name section contains names and indexes. First parse the data from the
+  // relevant sections so we can access it by its index.
+  std::vector<AddressRange> function_ranges;
+  std::vector<AddressRange> segment_ranges;
+
+  // Parse the code section.
+  if (SectionSP code_section_sp =
+          m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
+    llvm::Expected<std::vector<AddressRange>> functions =
+        ParseFunctions(code_section_sp);
+    if (!functions) {
+      LLDB_LOG_ERROR(log, functions.takeError(),
+                     "Failed to parse Wasm code section: {0}");
+      return;
+    }
+    function_ranges = *functions;
   }
 
-  llvm::Expected<std::vector<AddressRange>> functions =
-      ParseFunctions(code_section_sp);
-  if (!functions) {
-    LLDB_LOG_ERROR(log, functions.takeError(),
-                   "Failed to parse Wasm functions: {0}");
-    return;
+  // Parse the data section.
+  if (SectionSP data_section_sp =
+          m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) {
+    llvm::Expected<std::vector<AddressRange>> segments =
+        ParseData(data_section_sp);
+    if (!segments) {
+      LLDB_LOG_ERROR(log, segments.takeError(),
+                     "Failed to parse Wasm data section: {0}");
+      return;
+    }
+    segment_ranges = *segments;
   }
 
   // Parse the name section.
@@ -362,7 +429,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
   }
 
   llvm::Expected<std::vector<Symbol>> symbols =
-      ParseNames(name_section_sp, *functions);
+      ParseNames(name_section_sp, function_ranges, segment_ranges);
   if (!symbols) {
     LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: 
{0}");
     return;
@@ -408,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList 
&unified_section_list) {
       // For this reason Section::GetFileAddress() must return zero for the
       // Code section.
       vm_addr = 0;
+    } else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) {
+      section_type = eSectionTypeData;
+      section_name = ConstString("data");
     } else {
       section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
       if (section_type == eSectionTypeOther)
diff --git a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml 
b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
index 165bb53662f40..67b04aa3cf81c 100644
--- a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
+++ b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
@@ -1,3 +1,15 @@
+# clang -target wasm32 -nostdlib -Wl,--no-entry -Wl,--export-all -O0 -g -o 
simple.wasm simple.c
+# char* str = "data str";
+#
+# int add(int a, int b) {
+#   return a + b;
+# }
+#
+# int main() {
+#   int i = 1;
+#   int j = 2;
+#   return add(i, j);
+# }
 --- !WASM
 FileHeader:
   Version:         0x1
@@ -37,13 +49,13 @@ Sections:
         Mutable:         true
         InitExpr:
           Opcode:          I32_CONST
-          Value:           66560
+          Value:           66576
       - Index:           1
         Type:            I32
         Mutable:         false
         InitExpr:
           Opcode:          I32_CONST
-          Value:           1024
+          Value:           1036
       - Index:           2
         Type:            I32
         Mutable:         false
@@ -55,44 +67,50 @@ Sections:
         Mutable:         false
         InitExpr:
           Opcode:          I32_CONST
-          Value:           1024
+          Value:           1040
       - Index:           4
         Type:            I32
         Mutable:         false
         InitExpr:
           Opcode:          I32_CONST
-          Value:           66560
+          Value:           1040
       - Index:           5
         Type:            I32
         Mutable:         false
         InitExpr:
           Opcode:          I32_CONST
-          Value:           1024
+          Value:           66576
       - Index:           6
         Type:            I32
         Mutable:         false
         InitExpr:
           Opcode:          I32_CONST
-          Value:           66560
+          Value:           1024
       - Index:           7
         Type:            I32
         Mutable:         false
         InitExpr:
           Opcode:          I32_CONST
-          Value:           131072
+          Value:           66576
       - Index:           8
         Type:            I32
         Mutable:         false
         InitExpr:
           Opcode:          I32_CONST
-          Value:           0
+          Value:           131072
       - Index:           9
         Type:            I32
         Mutable:         false
         InitExpr:
           Opcode:          I32_CONST
-          Value:           1
+          Value:           0
       - Index:           10
+        Type:            I32
+        Mutable:         false
+        InitExpr:
+          Opcode:          I32_CONST
+          Value:           1
+      - Index:           11
         Type:            I32
         Mutable:         false
         InitExpr:
@@ -115,6 +133,9 @@ Sections:
       - Name:            main
         Kind:            FUNCTION
         Index:           3
+      - Name:            str
+        Kind:            GLOBAL
+        Index:           1
       - Name:            __main_void
         Kind:            FUNCTION
         Index:           2
@@ -123,34 +144,34 @@ Sections:
         Index:           0
       - Name:            __dso_handle
         Kind:            GLOBAL
-        Index:           1
+        Index:           2
       - Name:            __data_end
         Kind:            GLOBAL
-        Index:           2
+        Index:           3
       - Name:            __stack_low
         Kind:            GLOBAL
-        Index:           3
+        Index:           4
       - Name:            __stack_high
         Kind:            GLOBAL
-        Index:           4
+        Index:           5
       - Name:            __global_base
         Kind:            GLOBAL
-        Index:           5
+        Index:           6
       - Name:            __heap_base
         Kind:            GLOBAL
-        Index:           6
+        Index:           7
       - Name:            __heap_end
         Kind:            GLOBAL
-        Index:           7
+        Index:           8
       - Name:            __memory_base
         Kind:            GLOBAL
-        Index:           8
+        Index:           9
       - Name:            __table_base
         Kind:            GLOBAL
-        Index:           9
+        Index:           10
       - Name:            __wasm_first_page_end
         Kind:            GLOBAL
-        Index:           10
+        Index:           11
   - Type:            CODE
     Functions:
       - Index:           0
@@ -169,6 +190,35 @@ Sections:
       - Index:           3
         Locals:          []
         Body:            1082808080000F0B
+  - Type:            DATA
+    Segments:
+      - SectionOffset:   7
+        InitFlags:       0
+        Offset:
+          Opcode:          I32_CONST
+          Value:           1024
+        Content:         '646174612073747200'
+      - SectionOffset:   22
+        InitFlags:       0
+        Offset:
+          Opcode:          I32_CONST
+          Value:           1036
+        Content:         '00040000'
+  - Type:            CUSTOM
+    Name:            .debug_abbrev
+    Payload:         
011101250E1305030E10171B0E11015517000002340049133A0B3B0B02180000030101491300000421004913370B0000052400030E3E0B0B0B0000062400030E0B0B3E0B0000073400030E49133F193A0B3B0B02180000080F0049130000092E01110112064018030E3A0B3B0B271949133F1900000A05000218030E3A0B3B0B491300000B2E01110112064018030E3A0B3B0B49133F1900000C34000218030E3A0B3B0B4913000000
+  - Type:            CUSTOM
+    Name:            .debug_info
+    Payload:         
D100000004000000000004017F0000001D005E0000000000000016000000000000000000000002330000000101050300040000033F0000000446000000090005080000000601066B000000080707040000005E000000010105030C040000083F00000009050000002900000004ED00029F5A0000000103CD0000000A02910C690000000103CD0000000A029108670000000103CD000000000B2F0000004C00000004ED00009F0D0000000107CD0000000C029108140000000108CD0000000C029104120000000109CD000000000500000000050400
+  - Type:            CUSTOM
+    Name:            .debug_ranges
+    Payload:         050000002E0000002F0000007B0000000000000000000000
+  - Type:            CUSTOM
+    Name:            .debug_str
+    Payload:         
696E74007374720063686172006D61696E006A0069002F55736572732F6A6F6E61732F7761736D2D6D6963726F2D72756E74696D652F70726F647563742D6D696E692F706C6174666F726D732F64617277696E2F6275696C64006164640073696D706C652E6300620061005F5F41525241595F53495A455F545950455F5F00636C616E672076657273696F6E2032322E302E306769742028676974406769746875622E636F6D3A6C6C766D2F6C6C766D2D70726F6A6563742E67697420363363633265333930646235376362633430306235313937373162373030356561623166633736612900
+  - Type:            CUSTOM
+    Name:            .debug_line
+    Payload:         
62000000040020000000010101FB0E0D0001010101000000010000010073696D706C652E6300000000000005020500000014050A0A08AD050E0658050C5805032002020001010005022F0000001805070A08BB75050E7505110658050A58050382020F000101
   - Type:            CUSTOM
     Name:            name
     FunctionNames:
@@ -183,8 +233,17 @@ Sections:
     GlobalNames:
       - Index:           0
         Name:            __stack_pointer
+    DataSegmentNames:
+      - Index:           0
+        Name:            .rodata
+      - Index:           1
+        Name:            .data
   - Type:            CUSTOM
+    HeaderSecSizeEncodingLen: 2
     Name:            producers
+    Languages:
+      - Name:            C11
+        Version:         ''
     Tools:
       - Name:            clang
         Version:         '22.0.0git'
diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test 
b/lldb/test/Shell/Symtab/symtab-wasm.test
index fc185cd81a0ec..5374b0c2f2892 100644
--- a/lldb/test/Shell/Symtab/symtab-wasm.test
+++ b/lldb/test/Shell/Symtab/symtab-wasm.test
@@ -1,7 +1,9 @@
 # RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm
 # RUN: %lldb %t.wasm -o 'image dump symtab'
 
-# CHECK: Code 0x0000000000000002 {{.*}} __wasm_call_ctors
-# CHECK: Code 0x0000000000000005 {{.*}} add
-# CHECK: Code 0x000000000000002f {{.*}} __original_main
-# CHECK: Code 0x000000000000007c {{.*}} main
+# CHECK: Code 0x0000000000000002 0x0000000000000002 {{.*}} __wasm_call_ctors
+# CHECK: Code 0x0000000000000005 0x0000000000000029 {{.*}} add
+# CHECK: Code 0x000000000000002f 0x000000000000004c {{.*}} __original_main
+# CHECK: Code 0x000000000000007c 0x0000000000000009 {{.*}} main
+# CHECK: Data 0x000000000000022f 0x0000000000000041 {{.*}} .rodata
+# CHECK: Data 0x0000000000000270 0x0000000000000000 {{.*}} .data

``````````

</details>


https://github.com/llvm/llvm-project/pull/153494
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to