Author: n2h9 Date: 2025-12-04T13:56:26-08:00 New Revision: da4ea75336a55402797a7f10805ef5e2cd5fe9de
URL: https://github.com/llvm/llvm-project/commit/da4ea75336a55402797a7f10805ef5e2cd5fe9de DIFF: https://github.com/llvm/llvm-project/commit/da4ea75336a55402797a7f10805ef5e2cd5fe9de.diff LOG: [lldb] [disassembler] chore: enhance VariableAnnotator to return structured data: introduce VariableAnnotator::AnnotateStructured method (#169408) ## Description Contribution to this topic [Rich Disassembler for LLDB](https://discourse.llvm.org/t/rich-disassembler-for-lldb/76952), this part. ``` The rich disassembler output should be exposed as structured data and made available through LLDB’s scripting API so more tooling could be built on top of this ``` ---- This pr introduces new method `AnnotateStructured` in `VariableAnnotator` class, which returns the result as a vector of `VariableAnnotation` structured data, compared to original `Annotate`. Additionally structured data is enhanced with information inferred from `DWARFExpressionEntry` and variable declaration data. I have moved this part of functionality form a bigger pr https://github.com/llvm/llvm-project/pull/165163 to make it easier to review, deliver smaller chunk faster in an incremental way. ## Testing Run test with ```sh ./build/bin/lldb-dotest -v -p TestVariableAnnotationsDisassembler.py lldb/test/API/functionalities/disassembler-variables ``` all tests (9 existing) are passing. <details> <summary>screenshot 2025-11-24</summary> <img width="1344" height="875" alt="screenshot" src="https://github.com/user-attachments/assets/863e0fca-1e3e-43dc-bfa3-4b78ce287ae6" /> </details> <details> <summary>screenshot 2025-11-26</summary> <img width="1851" height="865" alt="image" src="https://github.com/user-attachments/assets/d47dacee-a679-4a49-ab22-efb5a16fe29c" /> </details> <details> <summary>screenshot 2025-12-03</summary> <img width="1592" height="922" alt="Screenshot From 2025-12-03 22-11-30" src="https://github.com/user-attachments/assets/957ded3d-bea1-43d0-8241-d342dfc2c7b0" /> </details> --------- Signed-off-by: Nikita B <[email protected]> Co-authored-by: Jonas Devlieghere <[email protected]> Added: Modified: lldb/include/lldb/Core/Disassembler.h lldb/source/Core/Disassembler.cpp Removed: ################################################################################ diff --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h index ab0f4ac804a7c..fb91907e5b520 100644 --- a/lldb/include/lldb/Core/Disassembler.h +++ b/lldb/include/lldb/Core/Disassembler.h @@ -574,24 +574,40 @@ class Disassembler : public std::enable_shared_from_this<Disassembler>, const Disassembler &operator=(const Disassembler &) = delete; }; +/// Structured data for a single variable annotation. +struct VariableAnnotation { + std::string variable_name; + /// Location description (e.g., "r15", "undef", "const_0"). + std::string location_description; + /// Whether variable is live at this instruction. + bool is_live; + /// Register numbering scheme for location interpretation. + lldb::RegisterKind register_kind; + /// Where this annotation is valid. + std::optional<lldb_private::AddressRange> address_range; + /// Source file where variable was declared. + std::optional<std::string> decl_file; + /// Line number where variable was declared. + std::optional<uint32_t> decl_line; + /// Variable's type name. + std::optional<std::string> type_name; +}; + /// Tracks live variable annotations across instructions and produces /// per-instruction "events" like `name = RDI` or `name = <undef>`. class VariableAnnotator { - struct VarState { - /// Display name. - std::string name; - /// Last printed location (empty means <undef>). - std::string last_loc; - }; // Live state from the previous instruction, keyed by Variable::GetID(). - llvm::DenseMap<lldb::user_id_t, VarState> m_live_vars; + llvm::DenseMap<lldb::user_id_t, VariableAnnotation> m_live_vars; public: /// Compute annotation strings for a single instruction and update /// `m_live_vars`. Returns only the events that should be printed *at this /// instruction*. std::vector<std::string> Annotate(Instruction &inst); + + /// Returns structured data for all variables relevant at this instruction. + std::vector<VariableAnnotation> AnnotateStructured(Instruction &inst); }; } // namespace lldb_private diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp index ed32caf361e0a..2d73df1e485c3 100644 --- a/lldb/source/Core/Disassembler.cpp +++ b/lldb/source/Core/Disassembler.cpp @@ -286,6 +286,18 @@ bool Disassembler::ElideMixedSourceAndDisassemblyLine( return false; } +static constexpr const llvm::StringLiteral kUndefLocation = "undef"; +static constexpr const llvm::StringLiteral kUndefLocationFormatted = "<undef>"; +static void +AddVariableAnnotationToVector(std::vector<VariableAnnotation> &annotations, + VariableAnnotation annotation_entity, + const bool is_live) { + annotation_entity.is_live = is_live; + if (!is_live) + annotation_entity.location_description = kUndefLocation; + annotations.push_back(std::move(annotation_entity)); +} + // For each instruction, this block attempts to resolve in-scope variables // and determine if the current PC falls within their // DWARF location entry. If so, it prints a simplified annotation using the @@ -300,16 +312,37 @@ bool Disassembler::ElideMixedSourceAndDisassemblyLine( // disassembled instruction stream, similar to how debug information // enhances source-level debugging. std::vector<std::string> VariableAnnotator::Annotate(Instruction &inst) { + std::vector<VariableAnnotation> structured_annotations = + AnnotateStructured(inst); + std::vector<std::string> events; + events.reserve(structured_annotations.size()); + + for (const VariableAnnotation &annotation : structured_annotations) { + const llvm::StringRef location = + (annotation.location_description == kUndefLocation + ? llvm::StringRef(kUndefLocationFormatted) + : llvm::StringRef(annotation.location_description)); + + events.push_back( + llvm::formatv("{0} = {1}", annotation.variable_name, location).str()); + } + + return events; +} + +std::vector<VariableAnnotation> +VariableAnnotator::AnnotateStructured(Instruction &inst) { + std::vector<VariableAnnotation> annotations; auto module_sp = inst.GetAddress().GetModule(); - // If we lost module context, everything becomes <undef>. + // If we lost module context, mark all live variables as UndefLocation. if (!module_sp) { for (const auto &KV : m_live_vars) - events.emplace_back(llvm::formatv("{0} = <undef>", KV.second.name).str()); + AddVariableAnnotationToVector(annotations, KV.second, false); m_live_vars.clear(); - return events; + return annotations; } // Resolve function/block at this *file* address. @@ -320,9 +353,9 @@ std::vector<std::string> VariableAnnotator::Annotate(Instruction &inst) { !sc.function) { // No function context: everything dies here. for (const auto &KV : m_live_vars) - events.emplace_back(llvm::formatv("{0} = <undef>", KV.second.name).str()); + AddVariableAnnotationToVector(annotations, KV.second, false); m_live_vars.clear(); - return events; + return annotations; } // Collect in-scope variables for this instruction into current_vars. @@ -349,7 +382,7 @@ std::vector<std::string> VariableAnnotator::Annotate(Instruction &inst) { // Prefer "register-only" output when we have an ABI. opts.PrintRegisterOnly = static_cast<bool>(abi_sp); - llvm::DenseMap<lldb::user_id_t, VarState> current_vars; + llvm::DenseMap<lldb::user_id_t, VariableAnnotation> current_vars; for (size_t i = 0, e = var_list.GetSize(); i != e; ++i) { lldb::VariableSP v = var_list.GetVariableAtIndex(i); @@ -376,8 +409,26 @@ std::vector<std::string> VariableAnnotator::Annotate(Instruction &inst) { if (loc.empty()) continue; - current_vars.try_emplace(v->GetID(), - VarState{std::string(name), std::string(loc)}); + std::optional<std::string> decl_file; + std::optional<uint32_t> decl_line; + std::optional<std::string> type_name; + + const Declaration &decl = v->GetDeclaration(); + if (decl.GetFile()) { + decl_file = decl.GetFile().GetFilename().AsCString(); + if (decl.GetLine() > 0) + decl_line = decl.GetLine(); + } + + if (Type *type = v->GetType()) + if (const char *type_str = type->GetName().AsCString()) + type_name = type_str; + + current_vars.try_emplace( + v->GetID(), + VariableAnnotation{std::string(name), std::string(loc), true, + entry.expr->GetRegisterKind(), entry.file_range, + decl_file, decl_line, type_name}); } // Diff m_live_vars → current_vars. @@ -385,26 +436,23 @@ std::vector<std::string> VariableAnnotator::Annotate(Instruction &inst) { // 1) Starts/changes: iterate current_vars and compare with m_live_vars. for (const auto &KV : current_vars) { auto it = m_live_vars.find(KV.first); - if (it == m_live_vars.end()) { + if (it == m_live_vars.end()) // Newly live. - events.emplace_back( - llvm::formatv("{0} = {1}", KV.second.name, KV.second.last_loc).str()); - } else if (it->second.last_loc != KV.second.last_loc) { + AddVariableAnnotationToVector(annotations, KV.second, true); + else if (it->second.location_description != KV.second.location_description) // Location changed. - events.emplace_back( - llvm::formatv("{0} = {1}", KV.second.name, KV.second.last_loc).str()); - } + AddVariableAnnotationToVector(annotations, KV.second, true); } - // 2) Ends: anything that was live but is not in current_vars becomes <undef>. - for (const auto &KV : m_live_vars) { + // 2) Ends: anything that was live but is not in current_vars becomes + // UndefLocation. + for (const auto &KV : m_live_vars) if (!current_vars.count(KV.first)) - events.emplace_back(llvm::formatv("{0} = <undef>", KV.second.name).str()); - } + AddVariableAnnotationToVector(annotations, KV.second, false); // Commit new state. m_live_vars = std::move(current_vars); - return events; + return annotations; } void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
