llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-lldb Author: None (llvmbot) <details> <summary>Changes</summary> Backport 3ca7a729901851f4a6f83e9783ee393cca46fd12 Requested by: @<!-- -->da-viper --- Patch is 21.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/176796.diff 9 Files Affected: - (modified) lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py (+6-1) - (modified) lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py (+37-25) - (modified) lldb/test/API/tools/lldb-dap/completions/main.cpp (+1) - (modified) lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp (+99-46) - (modified) lldb/tools/lldb-dap/LLDBUtils.cpp (+35) - (modified) lldb/tools/lldb-dap/LLDBUtils.h (+18) - (modified) lldb/tools/lldb-dap/Protocol/ProtocolRequests.h (+3-3) - (modified) lldb/unittests/DAP/LLDBUtilsTest.cpp (+28) - (modified) lldb/unittests/DAP/ProtocolTypesTest.cpp (+3-3) ``````````diff diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 6a98bb50c9036..a79d766118b9d 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -1393,7 +1393,12 @@ def request_compileUnits(self, moduleId): return response def request_completions(self, text, frameId=None): - args_dict = {"text": text, "column": len(text) + 1} + def code_units(input: str) -> int: + utf16_bytes = input.encode("utf-16-le") + # one UTF16 codeunit = 2 bytes. + return len(utf16_bytes) // 2 + + args_dict = {"text": text, "column": code_units(text) + 1} if frameId: args_dict["frameId"] = frameId command_dict = { diff --git a/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py index 0ebecf6872a7d..1792ff9953efe 100644 --- a/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py +++ b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py @@ -5,31 +5,32 @@ import lldbdap_testcase import dap_server from lldbsuite.test import lldbutil -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import skipIf +from lldbsuite.test.lldbtest import line_number session_completion = { "text": "session", - "label": "session -- Commands controlling LLDB session.", + "label": "session", + "detail": "Commands controlling LLDB session.", } settings_completion = { "text": "settings", - "label": "settings -- Commands for managing LLDB settings.", + "label": "settings", + "detail": "Commands for managing LLDB settings.", } memory_completion = { "text": "memory", - "label": "memory -- Commands for operating on memory in the current target process.", + "label": "memory", + "detail": "Commands for operating on memory in the current target process.", } command_var_completion = { "text": "var", - "label": "var -- Show variables for the current stack frame. Defaults to all arguments and local variables in scope. Names of argument, local, file static and file global variables can be specified.", + "label": "var", + "detail": "Show variables for the current stack frame. Defaults to all arguments and local variables in scope. Names of argument, local, file static and file global variables can be specified.", } -variable_var_completion = { - "text": "var", - "label": "var -- vector<baz> &", -} -variable_var1_completion = {"text": "var1", "label": "var1 -- int &"} -variable_var2_completion = {"text": "var2", "label": "var2 -- int &"} +variable_var_completion = {"text": "var", "label": "var", "detail": "vector<baz> &"} +variable_var1_completion = {"text": "var1", "label": "var1", "detail": "int &"} +variable_var2_completion = {"text": "var2", "label": "var2", "detail": "int &"} # Older version of libcxx produce slightly different typename strings for @@ -79,11 +80,13 @@ def test_command_completions(self): [ { "text": "read", - "label": "read -- Read from the memory of the current target process.", + "label": "read", + "detail": "Read from the memory of the current target process.", }, { "text": "region", - "label": "region -- Get information on the memory region containing an address in the current target process.", + "label": "region", + "detail": "Get information on the memory region containing an address in the current target process.", }, ], ) @@ -110,7 +113,8 @@ def test_command_completions(self): [ { "text": "set", - "label": "set -- Set the value of the specified debugger setting.", + "label": "set", + "detail": "Set the value of the specified debugger setting.", } ], ) @@ -167,7 +171,7 @@ def test_variable_completions(self): self.verify_completions( self.dap_server.get_completions("str"), [{"text": "struct", "label": "struct"}], - [{"text": "str1", "label": "str1 -- std::string &"}], + [{"text": "str1", "label": "str1", "detail": "std::string &"}], ) self.continue_to_next_stop() @@ -189,42 +193,46 @@ def test_variable_completions(self): self.dap_server.get_completions("str"), [ {"text": "struct", "label": "struct"}, - {"text": "str1", "label": "str1 -- std::string &"}, + {"text": "str1", "label": "str1", "detail": "std::string &"}, ], ) + self.assertIsNotNone(self.dap_server.get_completions("ƒ")) + # Test utf8 after ascii. + self.dap_server.get_completions("mƒ") + # Completion also works for more complex expressions self.verify_completions( self.dap_server.get_completions("foo1.v"), - [{"text": "var1", "label": "foo1.var1 -- int"}], + [{"text": "var1", "label": "foo1.var1", "detail": "int"}], ) self.verify_completions( self.dap_server.get_completions("foo1.my_bar_object.v"), - [{"text": "var1", "label": "foo1.my_bar_object.var1 -- int"}], + [{"text": "var1", "label": "foo1.my_bar_object.var1", "detail": "int"}], ) self.verify_completions( self.dap_server.get_completions("foo1.var1 + foo1.v"), - [{"text": "var1", "label": "foo1.var1 -- int"}], + [{"text": "var1", "label": "foo1.var1", "detail": "int"}], ) self.verify_completions( self.dap_server.get_completions("foo1.var1 + v"), - [{"text": "var1", "label": "var1 -- int &"}], + [{"text": "var1", "label": "var1", "detail": "int &"}], ) # should correctly handle spaces between objects and member operators self.verify_completions( self.dap_server.get_completions("foo1 .v"), - [{"text": "var1", "label": ".var1 -- int"}], - [{"text": "var2", "label": ".var2 -- int"}], + [{"text": "var1", "label": ".var1", "detail": "int"}], + [{"text": "var2", "label": ".var2", "detail": "int"}], ) self.verify_completions( self.dap_server.get_completions("foo1 . v"), - [{"text": "var1", "label": "var1 -- int"}], - [{"text": "var2", "label": "var2 -- int"}], + [{"text": "var1", "label": "var1", "detail": "int"}], + [{"text": "var2", "label": "var2", "detail": "int"}], ) # Even in variable mode, we can still use the escape prefix @@ -273,6 +281,10 @@ def test_auto_completions(self): ], ) + # TODO: Note we are not checking the result because the `expression --` command adds an extra character + # for non ascii variables. + self.assertIsNotNone(self.dap_server.get_completions("ƒ")) + self.continue_to_exit() console_str = self.get_console() # we check in console to avoid waiting for output event. diff --git a/lldb/test/API/tools/lldb-dap/completions/main.cpp b/lldb/test/API/tools/lldb-dap/completions/main.cpp index 4314067cfe951..a12ce18201d97 100644 --- a/lldb/test/API/tools/lldb-dap/completions/main.cpp +++ b/lldb/test/API/tools/lldb-dap/completions/main.cpp @@ -29,6 +29,7 @@ int main(int argc, char const *argv[]) { fun(vec); bar bar1 = {2}; bar *bar2 = &bar1; + int ƒake_f = 200; foo foo1 = {3, &bar1, bar1, NULL}; return 0; // breakpoint 2 } diff --git a/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp b/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp index de9a15dcb73f4..9a3973190f7e7 100644 --- a/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/CompletionsHandler.cpp @@ -7,17 +7,70 @@ //===----------------------------------------------------------------------===// #include "DAP.h" -#include "JSONUtils.h" +#include "LLDBUtils.h" #include "Protocol/ProtocolRequests.h" #include "Protocol/ProtocolTypes.h" #include "RequestHandler.h" #include "lldb/API/SBStringList.h" +#include "llvm/Support/ConvertUTF.h" using namespace llvm; using namespace lldb_dap; +using namespace lldb; using namespace lldb_dap::protocol; namespace lldb_dap { +/// Gets the position in the UTF8 string where the specified line started. +static size_t GetLineStartPos(StringRef text, uint32_t line) { + if (line == 0) // Invalid line. + return StringRef::npos; + + if (line == 1) + return 0; + + uint32_t cur_line = 1; + size_t pos = 0; + + while (cur_line < line) { + const size_t new_line_pos = text.find('\n', pos); + + if (new_line_pos == StringRef::npos) + return new_line_pos; + + pos = new_line_pos + 1; + // text may end with a new line + if (pos >= text.size()) + return StringRef::npos; + + cur_line++; + } + + assert(pos < text.size()); + return pos; +} + +static std::optional<size_t> GetCursorPos(StringRef text, uint32_t line, + uint32_t utf16_codeunits) { + if (text.empty()) + return std::nullopt; + + const size_t line_start_pos = GetLineStartPos(text, line); + if (line_start_pos == StringRef::npos) + return std::nullopt; + + const StringRef completion_line = + text.substr(line_start_pos, text.find('\n', line_start_pos)); + if (completion_line.empty()) + return std::nullopt; + + const std::optional<size_t> cursor_pos_opt = + UTF16CodeunitToBytes(completion_line, utf16_codeunits); + if (!cursor_pos_opt) + return std::nullopt; + + const size_t cursor_pos = line_start_pos + *cursor_pos_opt; + return cursor_pos; +} /// Returns a list of possible completions for a given caret position and text. /// @@ -25,85 +78,85 @@ namespace lldb_dap { /// `supportsCompletionsRequest` is true. Expected<CompletionsResponseBody> CompletionsRequestHandler::Run(const CompletionsArguments &args) const { + std::string text = args.text; + const uint32_t line = args.line; + // column starts at 1. + const uint32_t utf16_codeunits = args.column - 1; + + const auto cursor_pos_opt = GetCursorPos(text, line, utf16_codeunits); + if (!cursor_pos_opt) + return CompletionsResponseBody{}; + + size_t cursor_pos = *cursor_pos_opt; + // If we have a frame, try to set the context for variable completions. lldb::SBFrame frame = dap.GetLLDBFrame(args.frameId); if (frame.IsValid()) { - frame.GetThread().GetProcess().SetSelectedThread(frame.GetThread()); - frame.GetThread().SetSelectedFrame(frame.GetFrameID()); - } - - std::string text = args.text; - auto original_column = args.column; - auto original_line = args.line; - auto offset = original_column - 1; - if (original_line > 1) { - SmallVector<StringRef, 2> lines; - StringRef(text).split(lines, '\n'); - for (int i = 0; i < original_line - 1; i++) { - offset += lines[i].size(); - } + lldb::SBThread frame_thread = frame.GetThread(); + frame_thread.GetProcess().SetSelectedThread(frame_thread); + frame_thread.SetSelectedFrame(frame.GetFrameID()); } - std::vector<CompletionItem> targets; - - bool had_escape_prefix = - StringRef(text).starts_with(dap.configuration.commandEscapePrefix); - ReplMode completion_mode = dap.DetectReplMode(frame, text, true); - - // Handle the offset change introduced by stripping out the + const StringRef escape_prefix = dap.configuration.commandEscapePrefix; + const bool had_escape_prefix = StringRef(text).starts_with(escape_prefix); + const ReplMode repl_mode = dap.DetectReplMode(frame, text, true); + // Handle the cursor_pos change introduced by stripping out the // `command_escape_prefix`. if (had_escape_prefix) { - if (offset < - static_cast<int64_t>(dap.configuration.commandEscapePrefix.size())) { - return CompletionsResponseBody{std::move(targets)}; - } - offset -= dap.configuration.commandEscapePrefix.size(); + if (cursor_pos < escape_prefix.size()) + return CompletionsResponseBody{}; + + cursor_pos -= escape_prefix.size(); } // While the user is typing then we likely have an incomplete input and cannot // reliably determine the precise intent (command vs variable), try completing // the text as both a command and variable expression, if applicable. const std::string expr_prefix = "expression -- "; - std::array<std::tuple<ReplMode, std::string, uint64_t>, 2> exprs = { - {std::make_tuple(ReplMode::Command, text, offset), + const std::array<std::tuple<ReplMode, std::string, uint64_t>, 2> exprs = { + {std::make_tuple(ReplMode::Command, text, cursor_pos), std::make_tuple(ReplMode::Variable, expr_prefix + text, - offset + expr_prefix.size())}}; + cursor_pos + expr_prefix.size())}}; + + CompletionsResponseBody response; + std::vector<CompletionItem> &targets = response.targets; + lldb::SBCommandInterpreter interpreter = dap.debugger.GetCommandInterpreter(); for (const auto &[mode, line, cursor] : exprs) { - if (completion_mode != ReplMode::Auto && completion_mode != mode) + if (repl_mode != ReplMode::Auto && repl_mode != mode) continue; lldb::SBStringList matches; lldb::SBStringList descriptions; - if (!dap.debugger.GetCommandInterpreter().HandleCompletionWithDescriptions( - line.c_str(), cursor, 0, 100, matches, descriptions)) + if (!interpreter.HandleCompletionWithDescriptions( + line.c_str(), cursor, 0, 50, matches, descriptions)) continue; // The first element is the common substring after the cursor position for // all the matches. The rest of the elements are the matches so ignore the // first result. - for (size_t i = 1; i < matches.GetSize(); i++) { - std::string match = matches.GetStringAtIndex(i); - std::string description = descriptions.GetStringAtIndex(i); + for (uint32_t i = 1; i < matches.GetSize(); i++) { + const StringRef match = matches.GetStringAtIndex(i); + const StringRef description = descriptions.GetStringAtIndex(i); - CompletionItem item; StringRef match_ref = match; - for (StringRef commit_point : {".", "->"}) { - if (match_ref.contains(commit_point)) { - match_ref = match_ref.rsplit(commit_point).second; + for (const StringRef commit_point : {".", "->"}) { + if (const size_t pos = match_ref.rfind(commit_point); + pos != StringRef::npos) { + match_ref = match_ref.substr(pos + commit_point.size()); } } - item.text = match_ref; - if (description.empty()) - item.label = match; - else - item.label = match + " -- " + description; + CompletionItem item; + item.text = match_ref; + item.label = match; + if (!description.empty()) + item.detail = description; targets.emplace_back(std::move(item)); } } - return CompletionsResponseBody{std::move(targets)}; + return response; } } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/LLDBUtils.cpp b/lldb/tools/lldb-dap/LLDBUtils.cpp index 8a651249aee02..e7407e9da9efb 100644 --- a/lldb/tools/lldb-dap/LLDBUtils.cpp +++ b/lldb/tools/lldb-dap/LLDBUtils.cpp @@ -19,6 +19,7 @@ #include "lldb/lldb-defines.h" #include "lldb/lldb-enumerations.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" @@ -277,4 +278,38 @@ lldb::SBLineEntry GetLineEntryForAddress(lldb::SBTarget &target, return sc.GetLineEntry(); } +std::optional<size_t> UTF16CodeunitToBytes(llvm::StringRef line, + uint32_t utf16_codeunits) { + size_t bytes_count = 0; + size_t utf16_seen_cu = 0; + size_t idx = 0; + const size_t line_size = line.size(); + + while (idx < line_size && utf16_seen_cu < utf16_codeunits) { + const char first_char = line[idx]; + const auto num_bytes = llvm::getNumBytesForUTF8(first_char); + + if (num_bytes == 4) { + utf16_seen_cu += 2; + } else if (num_bytes < 4) { + utf16_seen_cu += 1; + } else { + // getNumBytesForUTF8 may return bytes greater than 4 this is not valid + // UTF8 + return std::nullopt; + } + + idx += num_bytes; + if (utf16_seen_cu <= utf16_codeunits) { + bytes_count = idx; + } else { + // We are in the middle of a codepoint or the utf16_codeunits ends in the + // middle of a codepoint. + return std::nullopt; + } + } + + return bytes_count; +} + } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/LLDBUtils.h b/lldb/tools/lldb-dap/LLDBUtils.h index 9545654504e8d..19174ba59654d 100644 --- a/lldb/tools/lldb-dap/LLDBUtils.h +++ b/lldb/tools/lldb-dap/LLDBUtils.h @@ -248,6 +248,24 @@ llvm::Error ToError(const lldb::SBError &error, bool show_user = true); /// Provides the string value if this data structure is a string type. std::string GetStringValue(const lldb::SBStructuredData &data); +/// Converts UTF16 column codeunits to bytes. +/// we are recieving utf8 from the specification. +/// UTF16 codunit size => 2 bytes. +/// UTF8 codunit size => 1 byte. +/// Example +/// | info | info | utf16_cu | size in bytes | +/// | fake f | ƒ | 1 | 2 | +/// | fake c | ç | 1 | 3 | +/// | poop char| 💩 | 2 | 4 | +/// +/// so with inputs string of +/// (`ƒ💩`, 3) we have 3 utf16_u and ( 2 + 4 ) bytes. +/// (`ƒ💩`, 2) we have 3 utf16_u and ( 2 + 4 ) bytes but the position is in +/// between the 💩 char so we return null since the codepoint is not complete. +/// +/// see https://utf8everywhere.org/#characters for more info. +std::optional<size_t> UTF16CodeunitToBytes(llvm::StringRef line, + uint32_t utf16_codeunits); } // namespace lldb_dap #endif diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h index 9d5bdf1efe94c..f7b7bf2e4dda4 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h @@ -313,7 +313,7 @@ bool fromJSON(const llvm::json::Value &, LaunchRequestArguments &, /// field is required. using LaunchResponse = VoidResponse; -#define LLDB_DAP_INVALID_PORT -1 +#define LLDB_DAP_INVALID_PORT (-1) /// An invalid 'frameId' default value. #define LLDB_DAP_INVALID_FRAME_ID UINT64_MAX @@ -407,11 +407,11 @@ struct CompletionsArguments { /// The position within `text` for which to determine the completion /// proposals. It is measured in UTF-16 code units and the client capability /// `columnsStartAt1` determines whether it is 0- or 1-based. - int64_t column = 0; + uint32_t column = LLDB_INVALID_COLUMN_NUMBER; /// A line for which to determine the completion proposals. If missing the /// first line of the text is assumed. - int64_t line = 0; + uint32_t line = 1; }; bool fromJSON(const llvm::json::Value &, CompletionsArguments &, llvm::json::Path); diff --git a/lldb/unittests/DAP/LLDBUtilsTest.cpp b/lldb/unittests/DAP/LLDBUtilsTest.cpp index 4f619af2b136e..ade452baf12ba 100644 --- a/lldb/unittests/DAP/LLDBUtilsTest.cpp +++ b/lldb/unittests/DAP/LLDBUtilsTest.cpp @@ -9,6 +9,7 @@ #include "LLDBUtils.h" #include "lldb/API/SBError.h" #include "lldb/API/SBStructuredData.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Error.h" #include "gtest/gtest.h" @@ -63,3 +64,30 @@ TEST(LLDBUtilsTest, ToError) { std::string error_message = toString(std::move(llvm_error)); EXPECT_EQ(error_message, "Test error message"); } + +TEST(LLDBUtilsTest, UTF16Codeunits) { + using Expect = std::optional<size_t>; + + EXPECT_EQ(UTF16CodeunitToBytes("a", 0), Expect{0}); + EXPECT_EQ(UTF16CodeunitToBytes("some word", 4), Expect{4}); + EX... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/176796 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
