https://github.com/eronnen updated 
https://github.com/llvm/llvm-project/pull/140486

>From 3cfe849fee4f5d489a3205ae0d2a8cd0f26a1b76 Mon Sep 17 00:00:00 2001
From: Ely Ronnen <elyron...@gmail.com>
Date: Tue, 20 May 2025 00:47:48 +0200
Subject: [PATCH 1/3] [lldb-dap] Fix disassemble request instruction offset
 handling

---
 .../test/tools/lldb-dap/dap_server.py         |   2 +-
 .../Handler/DisassembleRequestHandler.cpp     | 243 ++++++++++++------
 lldb/tools/lldb-dap/Handler/RequestHandler.h  |   9 +
 3 files changed, 173 insertions(+), 81 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py 
b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index 70fd0b0c419db..eee4e29b92a2a 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -735,7 +735,7 @@ def request_disconnect(self, terminateDebuggee=None):
         return self.send_recv(command_dict)
 
     def request_disassemble(
-        self, memoryReference, offset=-50, instructionCount=200, 
resolveSymbols=True
+        self, memoryReference, offset=0, instructionCount=200, 
resolveSymbols=True
     ):
         args_dict = {
             "memoryReference": memoryReference,
diff --git a/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp 
b/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp
index d779de1e8e834..38bd254057c9e 100644
--- a/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp
@@ -26,8 +26,6 @@ namespace lldb_dap {
 /// `supportsDisassembleRequest` is true.
 llvm::Expected<DisassembleResponseBody>
 DisassembleRequestHandler::Run(const DisassembleArguments &args) const {
-  std::vector<DisassembledInstruction> instructions;
-
   std::optional<lldb::addr_t> addr_opt =
       DecodeMemoryReference(args.memoryReference);
   if (!addr_opt.has_value())
@@ -35,7 +33,7 @@ DisassembleRequestHandler::Run(const DisassembleArguments 
&args) const {
                                       args.memoryReference);
 
   lldb::addr_t addr_ptr = *addr_opt;
-  addr_ptr += args.instructionOffset.value_or(0);
+  addr_ptr += args.offset.value_or(0);
   lldb::SBAddress addr(addr_ptr, dap.target);
   if (!addr.IsValid())
     return llvm::make_error<DAPError>(
@@ -56,100 +54,185 @@ DisassembleRequestHandler::Run(const DisassembleArguments 
&args) const {
     }
   }
 
+  int64_t instructionOffset = args.instructionOffset.value_or(0);
+  if (instructionOffset > 0) {
+    lldb::SBInstructionList forward_insts = dap.target.ReadInstructions(
+        addr, instructionOffset + 1, flavor_string.c_str());
+    if (forward_insts.GetSize() != static_cast<size_t>(instructionOffset + 1)) 
{
+      return llvm::make_error<DAPError>(
+          "Failed to disassemble instructions after " +
+          std::to_string(instructionOffset) +
+          " instructions from the given address.");
+    }
+
+    addr = forward_insts.GetInstructionAtIndex(instructionOffset).GetAddress();
+  }
+
+  const bool resolve_symbols = args.resolveSymbols.value_or(false);
+  std::vector<DisassembledInstruction> instructions;
+  if (instructionOffset < 0)
+    instructions = disassembleBackwards(addr, std::abs(instructionOffset),
+                                        flavor_string.c_str(), 
resolve_symbols);
+
+  const auto instructions_left = args.instructionCount - instructions.size();
   lldb::SBInstructionList insts = dap.target.ReadInstructions(
-      addr, args.instructionCount, flavor_string.c_str());
+      addr, instructions_left, flavor_string.c_str());
 
   if (!insts.IsValid())
     return llvm::make_error<DAPError>(
         "Failed to find instructions for memory address.");
 
-  const bool resolve_symbols = args.resolveSymbols.value_or(false);
+  // add the disassembly from the given address forward
   const auto num_insts = insts.GetSize();
-  for (size_t i = 0; i < num_insts; ++i) {
+  for (size_t i = 0;
+       i < num_insts && instructions.size() < args.instructionCount; ++i) {
     lldb::SBInstruction inst = insts.GetInstructionAtIndex(i);
-    auto addr = inst.GetAddress();
-    const auto inst_addr = addr.GetLoadAddress(dap.target);
-    const char *m = inst.GetMnemonic(dap.target);
-    const char *o = inst.GetOperands(dap.target);
-    const char *c = inst.GetComment(dap.target);
-    auto d = inst.GetData(dap.target);
-
-    std::string bytes;
-    llvm::raw_string_ostream sb(bytes);
-    for (unsigned i = 0; i < inst.GetByteSize(); i++) {
-      lldb::SBError error;
-      uint8_t b = d.GetUnsignedInt8(error, i);
-      if (error.Success()) {
-        sb << llvm::format("%2.2x ", b);
+    instructions.push_back(
+        SBInstructionToDisassembledInstruction(inst, resolve_symbols));
+  }
+
+  // Pad the instructions with invalid instructions if needed.
+  if (instructions.size() < args.instructionCount)
+    for (size_t i = instructions.size(); i < args.instructionCount; ++i)
+      instructions.push_back(GetInvalidInstruction());
+
+  return DisassembleResponseBody{std::move(instructions)};
+}
+
+std::vector<protocol::DisassembledInstruction>
+DisassembleRequestHandler::disassembleBackwards(
+    lldb::SBAddress &addr, const uint32_t instruction_count,
+    const char *flavor_string, bool resolve_symbols) const {
+  std::vector<DisassembledInstruction> instructions;
+
+  // TODO: Simply disassemble from `addr` - `instruction_count` *
+  // `instruction_size` in architectures with a fixed instruction size.
+
+  // need to disassemble backwards, let's try from the start of the symbol if
+  // available.
+  auto symbol = addr.GetSymbol();
+  if (symbol.IsValid()) {
+    // add valid instructions before the current instruction using the symbol.
+    lldb::SBInstructionList symbol_insts = dap.target.ReadInstructions(
+        symbol.GetStartAddress(), addr, flavor_string);
+    if (symbol_insts.IsValid()) {
+      size_t backwards_insts_start =
+          symbol_insts.GetSize() >= instruction_count
+              ? symbol_insts.GetSize() - instruction_count
+              : 0;
+      for (size_t i = backwards_insts_start;
+           i < symbol_insts.GetSize() &&
+           instructions.size() < instruction_count;
+           ++i) {
+        lldb::SBInstruction inst = symbol_insts.GetInstructionAtIndex(i);
+        instructions.push_back(
+            SBInstructionToDisassembledInstruction(inst, resolve_symbols));
       }
     }
+  }
 
-    DisassembledInstruction disassembled_inst;
-    disassembled_inst.address = inst_addr;
-    disassembled_inst.instructionBytes =
-        bytes.size() > 0 ? bytes.substr(0, bytes.size() - 1) : "";
-
-    std::string instruction;
-    llvm::raw_string_ostream si(instruction);
-
-    lldb::SBSymbol symbol = addr.GetSymbol();
-    // Only add the symbol on the first line of the function.
-    if (symbol.IsValid() && symbol.GetStartAddress() == addr) {
-      // If we have a valid symbol, append it as a label prefix for the first
-      // instruction. This is so you can see the start of a function/callsite
-      // in the assembly, at the moment VS Code (1.80) does not visualize the
-      // symbol associated with the assembly instruction.
-      si << (symbol.GetMangledName() != nullptr ? symbol.GetMangledName()
-                                                : symbol.GetName())
-         << ": ";
-
-      if (resolve_symbols)
-        disassembled_inst.symbol = symbol.GetDisplayName();
-    }
+  // pad the instructions with invalid instructions if needed.
+  while (instructions.size() < instruction_count) {
+    instructions.insert(instructions.begin(), GetInvalidInstruction());
+  }
 
-    si << llvm::formatv("{0,7} {1,12}", m, o);
-    if (c && c[0]) {
-      si << " ; " << c;
-    }
+  return instructions;
+}
+
+DisassembledInstruction
+DisassembleRequestHandler::SBInstructionToDisassembledInstruction(
+    lldb::SBInstruction &inst, bool resolve_symbols) const {
+  if (!inst.IsValid())
+    return GetInvalidInstruction();
+
+  auto addr = inst.GetAddress();
+  const auto inst_addr = addr.GetLoadAddress(dap.target);
+  const char *m = inst.GetMnemonic(dap.target);
+  const char *o = inst.GetOperands(dap.target);
+  const char *c = inst.GetComment(dap.target);
+  auto d = inst.GetData(dap.target);
+
+  std::string bytes;
+  llvm::raw_string_ostream sb(bytes);
+  for (unsigned i = 0; i < inst.GetByteSize(); i++) {
+    lldb::SBError error;
+    uint8_t b = d.GetUnsignedInt8(error, i);
+    if (error.Success())
+      sb << llvm::format("%2.2x ", b);
+  }
 
-    disassembled_inst.instruction = instruction;
-
-    auto line_entry = addr.GetLineEntry();
-    // If the line number is 0 then the entry represents a compiler generated
-    // location.
-    if (line_entry.GetStartAddress() == addr && line_entry.IsValid() &&
-        line_entry.GetFileSpec().IsValid() && line_entry.GetLine() != 0) {
-      auto source = CreateSource(line_entry);
-      disassembled_inst.location = std::move(source);
-
-      const auto line = line_entry.GetLine();
-      if (line != 0 && line != LLDB_INVALID_LINE_NUMBER)
-        disassembled_inst.line = line;
-
-      const auto column = line_entry.GetColumn();
-      if (column != 0 && column != LLDB_INVALID_COLUMN_NUMBER)
-        disassembled_inst.column = column;
-
-      auto end_line_entry = line_entry.GetEndAddress().GetLineEntry();
-      if (end_line_entry.IsValid() &&
-          end_line_entry.GetFileSpec() == line_entry.GetFileSpec()) {
-        const auto end_line = end_line_entry.GetLine();
-        if (end_line != 0 && end_line != LLDB_INVALID_LINE_NUMBER &&
-            end_line != line) {
-          disassembled_inst.endLine = end_line;
-
-          const auto end_column = end_line_entry.GetColumn();
-          if (end_column != 0 && end_column != LLDB_INVALID_COLUMN_NUMBER &&
-              end_column != column)
-            disassembled_inst.endColumn = end_column - 1;
-        }
+  DisassembledInstruction disassembled_inst;
+  disassembled_inst.address = inst_addr;
+  disassembled_inst.instructionBytes =
+      bytes.size() > 0 ? bytes.substr(0, bytes.size() - 1) : "";
+
+  std::string instruction;
+  llvm::raw_string_ostream si(instruction);
+
+  lldb::SBSymbol symbol = addr.GetSymbol();
+  // Only add the symbol on the first line of the function.
+  if (symbol.IsValid() && symbol.GetStartAddress() == addr) {
+    // If we have a valid symbol, append it as a label prefix for the first
+    // instruction. This is so you can see the start of a function/callsite
+    // in the assembly, at the moment VS Code (1.80) does not visualize the
+    // symbol associated with the assembly instruction.
+    si << (symbol.GetMangledName() != nullptr ? symbol.GetMangledName()
+                                              : symbol.GetName())
+       << ": ";
+
+    if (resolve_symbols)
+      disassembled_inst.symbol = symbol.GetDisplayName();
+  }
+
+  si << llvm::formatv("{0,7} {1,12}", m, o);
+  if (c && c[0]) {
+    si << " ; " << c;
+  }
+
+  disassembled_inst.instruction = std::move(instruction);
+
+  auto line_entry = addr.GetLineEntry();
+  // If the line number is 0 then the entry represents a compiler generated
+  // location.
+
+  if (line_entry.GetStartAddress() == addr && line_entry.IsValid() &&
+      line_entry.GetFileSpec().IsValid() && line_entry.GetLine() != 0) {
+    auto source = CreateSource(line_entry);
+    disassembled_inst.location = std::move(source);
+
+    const auto line = line_entry.GetLine();
+    if (line != 0 && line != LLDB_INVALID_LINE_NUMBER)
+      disassembled_inst.line = line;
+
+    const auto column = line_entry.GetColumn();
+    if (column != 0 && column != LLDB_INVALID_COLUMN_NUMBER)
+      disassembled_inst.column = column;
+
+    auto end_line_entry = line_entry.GetEndAddress().GetLineEntry();
+    if (end_line_entry.IsValid() &&
+        end_line_entry.GetFileSpec() == line_entry.GetFileSpec()) {
+      const auto end_line = end_line_entry.GetLine();
+      if (end_line != 0 && end_line != LLDB_INVALID_LINE_NUMBER &&
+          end_line != line) {
+        disassembled_inst.endLine = end_line;
+
+        const auto end_column = end_line_entry.GetColumn();
+        if (end_column != 0 && end_column != LLDB_INVALID_COLUMN_NUMBER &&
+            end_column != column)
+          disassembled_inst.endColumn = end_column - 1;
       }
     }
-
-    instructions.push_back(std::move(disassembled_inst));
   }
 
-  return DisassembleResponseBody{std::move(instructions)};
+  return disassembled_inst;
+}
+
+DisassembledInstruction
+DisassembleRequestHandler::GetInvalidInstruction() const {
+  DisassembledInstruction invalid_inst;
+  invalid_inst.presentationHint =
+      DisassembledInstruction::eDisassembledInstructionPresentationHintInvalid;
+  return invalid_inst;
 }
 
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.h 
b/lldb/tools/lldb-dap/Handler/RequestHandler.h
index 998b98137a1ea..15343402558bf 100644
--- a/lldb/tools/lldb-dap/Handler/RequestHandler.h
+++ b/lldb/tools/lldb-dap/Handler/RequestHandler.h
@@ -15,6 +15,7 @@
 #include "Protocol/ProtocolBase.h"
 #include "Protocol/ProtocolRequests.h"
 #include "Protocol/ProtocolTypes.h"
+#include "lldb/API/SBAddress.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
@@ -545,6 +546,14 @@ class DisassembleRequestHandler final
   }
   llvm::Expected<protocol::DisassembleResponseBody>
   Run(const protocol::DisassembleArguments &args) const override;
+
+  std::vector<protocol::DisassembledInstruction>
+  disassembleBackwards(lldb::SBAddress &addr, const uint32_t instruction_count,
+                       const char *flavor_string, bool resolve_symbols) const;
+  protocol::DisassembledInstruction
+  SBInstructionToDisassembledInstruction(lldb::SBInstruction &inst,
+                                         bool resolve_symbols) const;
+  protocol::DisassembledInstruction GetInvalidInstruction() const;
 };
 
 class ReadMemoryRequestHandler : public LegacyRequestHandler {

>From b9465b51ac74562241414a25a842628ed3c1806b Mon Sep 17 00:00:00 2001
From: Ely Ronnen <elyron...@gmail.com>
Date: Tue, 20 May 2025 01:47:25 +0200
Subject: [PATCH 2/3] optimize for architectures with a fixed opcode size, add
 backwards disassembly test

---
 lldb/include/lldb/API/SBTarget.h              |  4 ++
 .../test/tools/lldb-dap/dap_server.py         | 13 ++--
 .../test/tools/lldb-dap/lldbdap_testcase.py   | 10 +++-
 lldb/source/API/SBTarget.cpp                  | 20 +++++++
 .../disassemble/TestDAP_disassemble.py        | 45 +++++++++++---
 .../API/tools/lldb-dap/disassemble/main.c     |  2 +-
 .../TestDAP_instruction_breakpoint.py         |  5 +-
 .../Handler/DisassembleRequestHandler.cpp     | 59 ++++++++++++-------
 8 files changed, 116 insertions(+), 42 deletions(-)

diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h
index 17735fdca6559..30dc337f02a2e 100644
--- a/lldb/include/lldb/API/SBTarget.h
+++ b/lldb/include/lldb/API/SBTarget.h
@@ -349,6 +349,10 @@ class LLDB_API SBTarget {
 
   SBError SetLabel(const char *label);
 
+  uint32_t GetMinimumOpcodeByteSize() const;
+
+  uint32_t GetMaximumOpcodeByteSize() const;
+
   /// Architecture data byte width accessor
   ///
   /// \return
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py 
b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index eee4e29b92a2a..9937618a2cf54 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -136,7 +136,6 @@ def __init__(
         self.initialized = False
         self.frame_scopes = {}
         self.init_commands = init_commands
-        self.disassembled_instructions = {}
 
     @classmethod
     def encode_content(cls, s: str) -> bytes:
@@ -735,11 +734,15 @@ def request_disconnect(self, terminateDebuggee=None):
         return self.send_recv(command_dict)
 
     def request_disassemble(
-        self, memoryReference, offset=0, instructionCount=200, 
resolveSymbols=True
+        self,
+        memoryReference,
+        instructionOffset=-50,
+        instructionCount=200,
+        resolveSymbols=True,
     ):
         args_dict = {
             "memoryReference": memoryReference,
-            "offset": offset,
+            "instructionOffset": instructionOffset,
             "instructionCount": instructionCount,
             "resolveSymbols": resolveSymbols,
         }
@@ -748,9 +751,7 @@ def request_disassemble(
             "type": "request",
             "arguments": args_dict,
         }
-        instructions = self.send_recv(command_dict)["body"]["instructions"]
-        for inst in instructions:
-            self.disassembled_instructions[inst["address"]] = inst
+        return self.send_recv(command_dict)["body"]["instructions"]
 
     def request_readMemory(self, memoryReference, offset, count):
         args_dict = {
diff --git 
a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py 
b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
index afdc746ed0d0d..4028ae4a2525f 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
@@ -346,10 +346,14 @@ def disassemble(self, threadId=None, frameIndex=None):
         memoryReference = stackFrames[0]["instructionPointerReference"]
         self.assertIsNotNone(memoryReference)
 
-        if memoryReference not in self.dap_server.disassembled_instructions:
-            
self.dap_server.request_disassemble(memoryReference=memoryReference)
+        instructions = self.dap_server.request_disassemble(
+            memoryReference=memoryReference
+        )
+        disassembled_instructions = {}
+        for inst in instructions:
+            disassembled_instructions[inst["address"]] = inst
 
-        return self.dap_server.disassembled_instructions[memoryReference]
+        return disassembled_instructions, 
disassembled_instructions[memoryReference]
 
     def attach(
         self,
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index b42ada42b0931..1de8579500776 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -1668,6 +1668,26 @@ SBError SBTarget::SetLabel(const char *label) {
   return Status::FromError(target_sp->SetLabel(label));
 }
 
+uint32_t SBTarget::GetMinimumOpcodeByteSize() const {
+  LLDB_INSTRUMENT_VA(this);
+
+  TargetSP target_sp(GetSP());
+  if (target_sp) {
+    return target_sp->GetArchitecture().GetMinimumOpcodeByteSize();
+  }
+  return 0;
+}
+
+uint32_t SBTarget::GetMaximumOpcodeByteSize() const {
+  LLDB_INSTRUMENT_VA(this);
+
+  TargetSP target_sp(GetSP());
+  if (target_sp) {
+    return target_sp->GetArchitecture().GetMaximumOpcodeByteSize();
+  }
+  return 0;
+}
+
 uint32_t SBTarget::GetDataByteSize() {
   LLDB_INSTRUMENT_VA(this);
 
diff --git a/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py 
b/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
index 9e8ef5b289f2e..a09d5f111bf23 100644
--- a/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
+++ b/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
@@ -20,21 +20,48 @@ def test_disassemble(self):
         program = self.getBuildArtifact("a.out")
         self.build_and_launch(program)
         source = "main.c"
-        self.source_path = os.path.join(os.getcwd(), source)
-        self.set_source_breakpoints(
-            source,
-            [
-                line_number(source, "// breakpoint 1"),
-            ],
-        )
+        self.set_source_breakpoints(source, [line_number(source, "// 
breakpoint 1")])
         self.continue_to_next_stop()
 
-        pc_assembly = self.disassemble(frameIndex=0)
+        _, pc_assembly = self.disassemble(frameIndex=0)
         self.assertIn("location", pc_assembly, "Source location missing.")
         self.assertIn("instruction", pc_assembly, "Assembly instruction 
missing.")
 
         # The calling frame (qsort) is coming from a system library, as a 
result
         # we should not have a source location.
-        qsort_assembly = self.disassemble(frameIndex=1)
+        _, qsort_assembly = self.disassemble(frameIndex=1)
         self.assertNotIn("location", qsort_assembly, "Source location not 
expected.")
         self.assertIn("instruction", pc_assembly, "Assembly instruction 
missing.")
+
+    def test_disassemble_backwards(self):
+        """
+        Tests the 'disassemble' request with a backwards disassembly range.
+        """
+        program = self.getBuildArtifact("a.out")
+        self.build_and_launch(program)
+        source = "main.c"
+        self.set_source_breakpoints(source, [line_number(source, "// 
breakpoint 1")])
+        self.continue_to_next_stop()
+
+        instruction_pointer_reference = self.get_stackFrames()[1][
+            "instructionPointerReference"
+        ]
+        backwards_instructions = 50
+        instructions = self.dap_server.request_disassemble(
+            memoryReference=instruction_pointer_reference,
+            instructionOffset=-backwards_instructions,
+        )
+
+        frame_instruction_index = next(
+            (
+                i
+                for i, instruction in enumerate(instructions)
+                if instruction["address"] == instruction_pointer_reference
+            ),
+            -1,
+        )
+        self.assertEqual(
+            frame_instruction_index,
+            backwards_instructions,
+            f"requested instruction should be preceeded by 
{backwards_instructions} instructions. Actual index: {frame_instruction_index}",
+        )
diff --git a/lldb/test/API/tools/lldb-dap/disassemble/main.c 
b/lldb/test/API/tools/lldb-dap/disassemble/main.c
index 6609a4c37a70f..9da119ef70262 100644
--- a/lldb/test/API/tools/lldb-dap/disassemble/main.c
+++ b/lldb/test/API/tools/lldb-dap/disassemble/main.c
@@ -27,4 +27,4 @@ int main(void) {
 
   printf("\n");
   return 0;
-}
\ No newline at end of file
+}
diff --git 
a/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py
 
b/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py
index 53b7df9e54af2..07d2059b2749b 100644
--- 
a/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py
+++ 
b/lldb/test/API/tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py
@@ -66,7 +66,7 @@ def instruction_breakpoint_test(self):
         )
 
         # Check disassembly view
-        instruction = self.disassemble(frameIndex=0)
+        disassembled_instructions, instruction = self.disassemble(frameIndex=0)
         self.assertEqual(
             instruction["address"],
             intstructionPointerReference[0],
@@ -74,8 +74,7 @@ def instruction_breakpoint_test(self):
         )
 
         # Get next instruction address to set instruction breakpoint
-        disassembled_instruction_list = 
self.dap_server.disassembled_instructions
-        instruction_addr_list = list(disassembled_instruction_list.keys())
+        instruction_addr_list = list(disassembled_instructions.keys())
         index = instruction_addr_list.index(intstructionPointerReference[0])
         if len(instruction_addr_list) >= (index + 1):
             next_inst_addr = instruction_addr_list[index + 1]
diff --git a/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp 
b/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp
index 38bd254057c9e..2a0ae8d8e9e12 100644
--- a/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp
@@ -105,29 +105,48 @@ DisassembleRequestHandler::disassembleBackwards(
     const char *flavor_string, bool resolve_symbols) const {
   std::vector<DisassembledInstruction> instructions;
 
-  // TODO: Simply disassemble from `addr` - `instruction_count` *
-  // `instruction_size` in architectures with a fixed instruction size.
-
-  // need to disassemble backwards, let's try from the start of the symbol if
-  // available.
-  auto symbol = addr.GetSymbol();
-  if (symbol.IsValid()) {
-    // add valid instructions before the current instruction using the symbol.
-    lldb::SBInstructionList symbol_insts = dap.target.ReadInstructions(
-        symbol.GetStartAddress(), addr, flavor_string);
-    if (symbol_insts.IsValid()) {
-      size_t backwards_insts_start =
-          symbol_insts.GetSize() >= instruction_count
-              ? symbol_insts.GetSize() - instruction_count
-              : 0;
-      for (size_t i = backwards_insts_start;
-           i < symbol_insts.GetSize() &&
-           instructions.size() < instruction_count;
-           ++i) {
-        lldb::SBInstruction inst = symbol_insts.GetInstructionAtIndex(i);
+  if (dap.target.GetMinimumOpcodeByteSize() ==
+      dap.target.GetMaximumOpcodeByteSize()) {
+    // If the target has a fixed opcode size, we can disassemble backwards
+    // directly.
+    lldb::addr_t disassemble_start_load_addr =
+        addr.GetLoadAddress(dap.target) -
+        (instruction_count * dap.target.GetMinimumOpcodeByteSize());
+    lldb::SBAddress disassemble_start_addr(disassemble_start_load_addr,
+                                           dap.target);
+    lldb::SBInstructionList backwards_insts =
+        dap.target.ReadInstructions(addr, instruction_count, flavor_string);
+    if (backwards_insts.IsValid()) {
+      for (size_t i = 0; i < backwards_insts.GetSize(); ++i) {
+        lldb::SBInstruction inst = backwards_insts.GetInstructionAtIndex(i);
         instructions.push_back(
             SBInstructionToDisassembledInstruction(inst, resolve_symbols));
       }
+      return instructions;
+    }
+  } else {
+    // There is no opcode fixed size so we have no idea where are the valid
+    // instructions before the current address. let's try from the start of the
+    // symbol if available.
+    auto symbol = addr.GetSymbol();
+    if (symbol.IsValid()) {
+      // add valid instructions before the current instruction using the 
symbol.
+      lldb::SBInstructionList symbol_insts = dap.target.ReadInstructions(
+          symbol.GetStartAddress(), addr, flavor_string);
+      if (symbol_insts.IsValid()) {
+        size_t backwards_insts_start =
+            symbol_insts.GetSize() >= instruction_count
+                ? symbol_insts.GetSize() - instruction_count
+                : 0;
+        for (size_t i = backwards_insts_start;
+             i < symbol_insts.GetSize() &&
+             instructions.size() < instruction_count;
+             ++i) {
+          lldb::SBInstruction inst = symbol_insts.GetInstructionAtIndex(i);
+          instructions.push_back(
+              SBInstructionToDisassembledInstruction(inst, resolve_symbols));
+        }
+      }
     }
   }
 

>From e74487ff3b9c99a4c4774afff4ca36aeb6792bae Mon Sep 17 00:00:00 2001
From: Ely Ronnen <elyron...@gmail.com>
Date: Tue, 20 May 2025 23:10:28 +0200
Subject: [PATCH 3/3] simplify disassembly logics significantly

---
 lldb/source/API/SBTarget.cpp                  |   8 +-
 .../disassemble/TestDAP_disassemble.py        |  10 +-
 .../Handler/DisassembleRequestHandler.cpp     | 288 +++++++++---------
 lldb/tools/lldb-dap/Handler/RequestHandler.h  |   9 -
 4 files changed, 159 insertions(+), 156 deletions(-)

diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index 1de8579500776..cd8a770a0ec04 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -1672,9 +1672,9 @@ uint32_t SBTarget::GetMinimumOpcodeByteSize() const {
   LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
-  if (target_sp) {
+  if (target_sp)
     return target_sp->GetArchitecture().GetMinimumOpcodeByteSize();
-  }
+
   return 0;
 }
 
@@ -1682,9 +1682,9 @@ uint32_t SBTarget::GetMaximumOpcodeByteSize() const {
   LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
-  if (target_sp) {
+  if (target_sp)
     return target_sp->GetArchitecture().GetMaximumOpcodeByteSize();
-  }
+
   return 0;
 }
 
diff --git a/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py 
b/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
index a09d5f111bf23..a045dfd32ef2d 100644
--- a/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
+++ b/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
@@ -46,10 +46,18 @@ def test_disassemble_backwards(self):
         instruction_pointer_reference = self.get_stackFrames()[1][
             "instructionPointerReference"
         ]
-        backwards_instructions = 50
+        backwards_instructions = 200
+        instructions_count = 400
         instructions = self.dap_server.request_disassemble(
             memoryReference=instruction_pointer_reference,
             instructionOffset=-backwards_instructions,
+            instructionCount=instructions_count,
+        )
+
+        self.assertEqual(
+            len(instructions),
+            instructions_count,
+            "Disassemble request should return the exact requested number of 
instructions.",
         )
 
         frame_instruction_index = next(
diff --git a/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp 
b/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp
index 2a0ae8d8e9e12..9aa6995e5d668 100644
--- a/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp
@@ -12,164 +12,85 @@
 #include "Protocol/ProtocolRequests.h"
 #include "Protocol/ProtocolTypes.h"
 #include "RequestHandler.h"
+#include "lldb/API/SBAddress.h"
 #include "lldb/API/SBInstruction.h"
+#include "lldb/API/SBTarget.h"
 #include "lldb/lldb-types.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
 #include <optional>
 
 using namespace lldb_dap::protocol;
 
 namespace lldb_dap {
 
-/// Disassembles code stored at the provided location.
-/// Clients should only call this request if the corresponding capability
-/// `supportsDisassembleRequest` is true.
-llvm::Expected<DisassembleResponseBody>
-DisassembleRequestHandler::Run(const DisassembleArguments &args) const {
-  std::optional<lldb::addr_t> addr_opt =
-      DecodeMemoryReference(args.memoryReference);
-  if (!addr_opt.has_value())
-    return llvm::make_error<DAPError>("Malformed memory reference: " +
-                                      args.memoryReference);
-
-  lldb::addr_t addr_ptr = *addr_opt;
-  addr_ptr += args.offset.value_or(0);
-  lldb::SBAddress addr(addr_ptr, dap.target);
-  if (!addr.IsValid())
-    return llvm::make_error<DAPError>(
-        "Memory reference not found in the current binary.");
-
-  std::string flavor_string;
-  const auto target_triple = llvm::StringRef(dap.target.GetTriple());
-  // This handles both 32 and 64bit x86 architecture. The logic is duplicated 
in
-  // `CommandObjectDisassemble::CommandOptions::OptionParsingStarting`
-  if (target_triple.starts_with("x86")) {
-    const lldb::SBStructuredData flavor =
-        dap.debugger.GetSetting("target.x86-disassembly-flavor");
-
-    const size_t str_length = flavor.GetStringValue(nullptr, 0);
-    if (str_length != 0) {
-      flavor_string.resize(str_length + 1);
-      flavor.GetStringValue(flavor_string.data(), flavor_string.length());
-    }
-  }
-
-  int64_t instructionOffset = args.instructionOffset.value_or(0);
-  if (instructionOffset > 0) {
-    lldb::SBInstructionList forward_insts = dap.target.ReadInstructions(
-        addr, instructionOffset + 1, flavor_string.c_str());
-    if (forward_insts.GetSize() != static_cast<size_t>(instructionOffset + 1)) 
{
-      return llvm::make_error<DAPError>(
-          "Failed to disassemble instructions after " +
-          std::to_string(instructionOffset) +
-          " instructions from the given address.");
-    }
-
-    addr = forward_insts.GetInstructionAtIndex(instructionOffset).GetAddress();
-  }
-
-  const bool resolve_symbols = args.resolveSymbols.value_or(false);
-  std::vector<DisassembledInstruction> instructions;
-  if (instructionOffset < 0)
-    instructions = disassembleBackwards(addr, std::abs(instructionOffset),
-                                        flavor_string.c_str(), 
resolve_symbols);
-
-  const auto instructions_left = args.instructionCount - instructions.size();
-  lldb::SBInstructionList insts = dap.target.ReadInstructions(
-      addr, instructions_left, flavor_string.c_str());
-
-  if (!insts.IsValid())
-    return llvm::make_error<DAPError>(
-        "Failed to find instructions for memory address.");
-
-  // add the disassembly from the given address forward
-  const auto num_insts = insts.GetSize();
-  for (size_t i = 0;
-       i < num_insts && instructions.size() < args.instructionCount; ++i) {
-    lldb::SBInstruction inst = insts.GetInstructionAtIndex(i);
-    instructions.push_back(
-        SBInstructionToDisassembledInstruction(inst, resolve_symbols));
-  }
-
-  // Pad the instructions with invalid instructions if needed.
-  if (instructions.size() < args.instructionCount)
-    for (size_t i = instructions.size(); i < args.instructionCount; ++i)
-      instructions.push_back(GetInvalidInstruction());
-
-  return DisassembleResponseBody{std::move(instructions)};
+static protocol::DisassembledInstruction GetInvalidInstruction() {
+  DisassembledInstruction invalid_inst;
+  invalid_inst.presentationHint =
+      DisassembledInstruction::eDisassembledInstructionPresentationHintInvalid;
+  return invalid_inst;
 }
 
-std::vector<protocol::DisassembledInstruction>
-DisassembleRequestHandler::disassembleBackwards(
-    lldb::SBAddress &addr, const uint32_t instruction_count,
-    const char *flavor_string, bool resolve_symbols) const {
-  std::vector<DisassembledInstruction> instructions;
+static lldb::SBAddress GetDisassembleStartAddress(lldb::SBTarget target,
+                                                  lldb::SBAddress addr,
+                                                  int64_t instruction_offset) {
+  if (instruction_offset == 0)
+    return addr;
+
+  if (target.GetMinimumOpcodeByteSize() == target.GetMaximumOpcodeByteSize()) {
+    // We have fixed opcode size, so we can calculate the address directly,
+    // negative or positive.
+    lldb::addr_t load_addr = addr.GetLoadAddress(target);
+    load_addr += instruction_offset * target.GetMinimumOpcodeByteSize();
+    return lldb::SBAddress(load_addr, target);
+  }
 
-  if (dap.target.GetMinimumOpcodeByteSize() ==
-      dap.target.GetMaximumOpcodeByteSize()) {
-    // If the target has a fixed opcode size, we can disassemble backwards
-    // directly.
-    lldb::addr_t disassemble_start_load_addr =
-        addr.GetLoadAddress(dap.target) -
-        (instruction_count * dap.target.GetMinimumOpcodeByteSize());
-    lldb::SBAddress disassemble_start_addr(disassemble_start_load_addr,
-                                           dap.target);
-    lldb::SBInstructionList backwards_insts =
-        dap.target.ReadInstructions(addr, instruction_count, flavor_string);
-    if (backwards_insts.IsValid()) {
-      for (size_t i = 0; i < backwards_insts.GetSize(); ++i) {
-        lldb::SBInstruction inst = backwards_insts.GetInstructionAtIndex(i);
-        instructions.push_back(
-            SBInstructionToDisassembledInstruction(inst, resolve_symbols));
-      }
-      return instructions;
-    }
-  } else {
-    // There is no opcode fixed size so we have no idea where are the valid
-    // instructions before the current address. let's try from the start of the
-    // symbol if available.
-    auto symbol = addr.GetSymbol();
-    if (symbol.IsValid()) {
-      // add valid instructions before the current instruction using the 
symbol.
-      lldb::SBInstructionList symbol_insts = dap.target.ReadInstructions(
-          symbol.GetStartAddress(), addr, flavor_string);
-      if (symbol_insts.IsValid()) {
-        size_t backwards_insts_start =
-            symbol_insts.GetSize() >= instruction_count
-                ? symbol_insts.GetSize() - instruction_count
-                : 0;
-        for (size_t i = backwards_insts_start;
-             i < symbol_insts.GetSize() &&
-             instructions.size() < instruction_count;
-             ++i) {
-          lldb::SBInstruction inst = symbol_insts.GetInstructionAtIndex(i);
-          instructions.push_back(
-              SBInstructionToDisassembledInstruction(inst, resolve_symbols));
-        }
-      }
-    }
+  if (instruction_offset > 0) {
+    lldb::SBInstructionList forward_insts =
+        target.ReadInstructions(addr, instruction_offset + 1);
+    return forward_insts.GetInstructionAtIndex(forward_insts.GetSize() - 1)
+        .GetAddress();
   }
 
-  // pad the instructions with invalid instructions if needed.
-  while (instructions.size() < instruction_count) {
-    instructions.insert(instructions.begin(), GetInvalidInstruction());
+  // We have a negative instruction offset, so we need to disassemble 
backwards.
+  // The opcode size is not fixed, so we have no idea where to start from.
+  // Let's try from the start of the current symbol if available.
+  auto symbol = addr.GetSymbol();
+  if (!symbol.IsValid())
+    return addr;
+
+  // Add valid instructions before the current instruction using the symbol.
+  lldb::SBInstructionList symbol_insts =
+      target.ReadInstructions(symbol.GetStartAddress(), addr, nullptr);
+  if (!symbol_insts.IsValid() || symbol_insts.GetSize() == 0)
+    return addr;
+
+  const auto backwards_instructions_count =
+      static_cast<size_t>(std::abs(instruction_offset));
+  if (symbol_insts.GetSize() < backwards_instructions_count) {
+    // We don't have enough instructions to disassemble backwards, so just
+    // return the start address of the symbol.
+    return symbol_insts.GetInstructionAtIndex(0).GetAddress();
   }
 
-  return instructions;
+  return symbol_insts
+      .GetInstructionAtIndex(symbol_insts.GetSize() -
+                             backwards_instructions_count)
+      .GetAddress();
 }
 
-DisassembledInstruction
-DisassembleRequestHandler::SBInstructionToDisassembledInstruction(
-    lldb::SBInstruction &inst, bool resolve_symbols) const {
+static DisassembledInstruction ConvertSBInstructionToDisassembledInstruction(
+    lldb::SBTarget &target, lldb::SBInstruction &inst, bool resolve_symbols) {
   if (!inst.IsValid())
     return GetInvalidInstruction();
 
   auto addr = inst.GetAddress();
-  const auto inst_addr = addr.GetLoadAddress(dap.target);
-  const char *m = inst.GetMnemonic(dap.target);
-  const char *o = inst.GetOperands(dap.target);
-  const char *c = inst.GetComment(dap.target);
-  auto d = inst.GetData(dap.target);
+  const auto inst_addr = addr.GetLoadAddress(target);
+  const char *m = inst.GetMnemonic(target);
+  const char *o = inst.GetOperands(target);
+  const char *c = inst.GetComment(target);
+  auto d = inst.GetData(target);
 
   std::string bytes;
   llvm::raw_string_ostream sb(bytes);
@@ -246,12 +167,95 @@ 
DisassembleRequestHandler::SBInstructionToDisassembledInstruction(
   return disassembled_inst;
 }
 
-DisassembledInstruction
-DisassembleRequestHandler::GetInvalidInstruction() const {
-  DisassembledInstruction invalid_inst;
-  invalid_inst.presentationHint =
-      DisassembledInstruction::eDisassembledInstructionPresentationHintInvalid;
-  return invalid_inst;
+/// Disassembles code stored at the provided location.
+/// Clients should only call this request if the corresponding capability
+/// `supportsDisassembleRequest` is true.
+llvm::Expected<DisassembleResponseBody>
+DisassembleRequestHandler::Run(const DisassembleArguments &args) const {
+  std::optional<lldb::addr_t> addr_opt =
+      DecodeMemoryReference(args.memoryReference);
+  if (!addr_opt.has_value())
+    return llvm::make_error<DAPError>("Malformed memory reference: " +
+                                      args.memoryReference);
+
+  lldb::addr_t addr_ptr = *addr_opt;
+  addr_ptr += args.offset.value_or(0);
+  lldb::SBAddress addr(addr_ptr, dap.target);
+  if (!addr.IsValid())
+    return llvm::make_error<DAPError>(
+        "Memory reference not found in the current binary.");
+
+  std::string flavor_string;
+  const auto target_triple = llvm::StringRef(dap.target.GetTriple());
+  // This handles both 32 and 64bit x86 architecture. The logic is duplicated 
in
+  // `CommandObjectDisassemble::CommandOptions::OptionParsingStarting`
+  if (target_triple.starts_with("x86")) {
+    const lldb::SBStructuredData flavor =
+        dap.debugger.GetSetting("target.x86-disassembly-flavor");
+
+    const size_t str_length = flavor.GetStringValue(nullptr, 0);
+    if (str_length != 0) {
+      flavor_string.resize(str_length + 1);
+      flavor.GetStringValue(flavor_string.data(), flavor_string.length());
+    }
+  }
+
+  // Offset (in instructions) to be applied after the byte offset (if any)
+  // before disassembling. Can be negative.
+  int64_t instruction_offset = args.instructionOffset.value_or(0);
+
+  // Calculate a sufficient address to start disassembling from.
+  lldb::SBAddress disassemble_start_addr =
+      GetDisassembleStartAddress(dap.target, addr, instruction_offset);
+  if (!disassemble_start_addr.IsValid())
+    return llvm::make_error<DAPError>(
+        "Unexpected error while disassembling instructions.");
+
+  lldb::SBInstructionList insts = dap.target.ReadInstructions(
+      disassemble_start_addr, args.instructionCount, flavor_string.c_str());
+  if (!insts.IsValid())
+    return llvm::make_error<DAPError>(
+        "Unexpected error while disassembling instructions.");
+
+  // Conver the found instructions to the DAP format.
+  const bool resolve_symbols = args.resolveSymbols.value_or(false);
+  std::vector<DisassembledInstruction> instructions;
+  size_t original_address_index = args.instructionCount;
+  for (size_t i = 0; i < insts.GetSize(); ++i) {
+    lldb::SBInstruction inst = insts.GetInstructionAtIndex(i);
+    if (inst.GetAddress() == addr)
+      original_address_index = i;
+
+    instructions.push_back(ConvertSBInstructionToDisassembledInstruction(
+        dap.target, inst, resolve_symbols));
+  }
+
+  // Check if we miss instructions at the beginning.
+  if (instruction_offset < 0) {
+    const auto backwards_instructions_count =
+        static_cast<size_t>(std::abs(instruction_offset));
+    if (original_address_index < backwards_instructions_count) {
+      // We don't have enough instructions before the main address as was
+      // requested. Let's pad the start of the instructions with invalid
+      // instructions.
+      std::vector<DisassembledInstruction> invalid_instructions(
+          backwards_instructions_count - original_address_index,
+          GetInvalidInstruction());
+      instructions.insert(instructions.begin(), invalid_instructions.begin(),
+                          invalid_instructions.end());
+
+      // Trim excess instructions if needed.
+      if (instructions.size() > args.instructionCount)
+        instructions.resize(args.instructionCount);
+    }
+  }
+
+  // Pad the instructions with invalid instructions if needed.
+  while (instructions.size() < args.instructionCount) {
+    instructions.push_back(GetInvalidInstruction());
+  }
+
+  return DisassembleResponseBody{std::move(instructions)};
 }
 
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.h 
b/lldb/tools/lldb-dap/Handler/RequestHandler.h
index 15343402558bf..998b98137a1ea 100644
--- a/lldb/tools/lldb-dap/Handler/RequestHandler.h
+++ b/lldb/tools/lldb-dap/Handler/RequestHandler.h
@@ -15,7 +15,6 @@
 #include "Protocol/ProtocolBase.h"
 #include "Protocol/ProtocolRequests.h"
 #include "Protocol/ProtocolTypes.h"
-#include "lldb/API/SBAddress.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
@@ -546,14 +545,6 @@ class DisassembleRequestHandler final
   }
   llvm::Expected<protocol::DisassembleResponseBody>
   Run(const protocol::DisassembleArguments &args) const override;
-
-  std::vector<protocol::DisassembledInstruction>
-  disassembleBackwards(lldb::SBAddress &addr, const uint32_t instruction_count,
-                       const char *flavor_string, bool resolve_symbols) const;
-  protocol::DisassembledInstruction
-  SBInstructionToDisassembledInstruction(lldb::SBInstruction &inst,
-                                         bool resolve_symbols) const;
-  protocol::DisassembledInstruction GetInvalidInstruction() const;
 };
 
 class ReadMemoryRequestHandler : public LegacyRequestHandler {

_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to