llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-lldb Author: John Harrison (ashgti) <details> <summary>Changes</summary> I ran into this while debugging a different issue, but when we calcualte the 'value' field of a variable we were not ensuring the contents were valid utf8. If assertions are enabled then llvm::json::Value will assert that the string contains invalid utf8. To address this I added a wrapper type (`lldb_dap::protocol::SanitizedString`) that can be used as a thin wrapper around `std::string` to ensure a field contains valid utf8. I've used it in a handful of places that I believe could contain invalid utf8 output, but we can add it to other places as well in the protocol types. --- Full diff: https://github.com/llvm/llvm-project/pull/181261.diff 7 Files Affected: - (modified) lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py (+33-13) - (modified) lldb/test/API/tools/lldb-dap/variables/main.cpp (+4) - (modified) lldb/tools/lldb-dap/DAP.cpp (+4-3) - (modified) lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp (+17) - (modified) lldb/tools/lldb-dap/Protocol/ProtocolBase.h (+19) - (modified) lldb/tools/lldb-dap/Protocol/ProtocolRequests.h (+1-1) - (modified) lldb/tools/lldb-dap/Protocol/ProtocolTypes.h (+2-1) ``````````diff diff --git a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py index 1dbb0143e7a55..f737d41eaecec 100644 --- a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py +++ b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py @@ -188,6 +188,8 @@ def do_test_scopes_variables_setVariable_evaluate( }, "readOnly": True, }, + "valid_str": {}, + "malformed_str": {}, "x": {"equals": {"type": "int"}}, } @@ -340,9 +342,9 @@ def do_test_scopes_variables_setVariable_evaluate( verify_locals["argc"]["equals"]["value"] = "123" verify_locals["pt"]["children"]["x"]["equals"]["value"] = "111" - verify_locals["x @ main.cpp:19"] = {"equals": {"type": "int", "value": "89"}} - verify_locals["x @ main.cpp:21"] = {"equals": {"type": "int", "value": "42"}} - verify_locals["x @ main.cpp:23"] = {"equals": {"type": "int", "value": "72"}} + verify_locals["x @ main.cpp:23"] = {"equals": {"type": "int", "value": "89"}} + verify_locals["x @ main.cpp:25"] = {"equals": {"type": "int", "value": "42"}} + verify_locals["x @ main.cpp:27"] = {"equals": {"type": "int", "value": "72"}} self.verify_variables(verify_locals, self.dap_server.get_local_variables()) @@ -350,22 +352,22 @@ def do_test_scopes_variables_setVariable_evaluate( self.assertFalse(self.set_local("x2", 9)["success"]) self.assertFalse(self.set_local("x @ main.cpp:0", 9)["success"]) - self.assertTrue(self.set_local("x @ main.cpp:19", 19)["success"]) - self.assertTrue(self.set_local("x @ main.cpp:21", 21)["success"]) - self.assertTrue(self.set_local("x @ main.cpp:23", 23)["success"]) + self.assertTrue(self.set_local("x @ main.cpp:23", 19)["success"]) + self.assertTrue(self.set_local("x @ main.cpp:25", 21)["success"]) + self.assertTrue(self.set_local("x @ main.cpp:27", 23)["success"]) # The following should have no effect - self.assertFalse(self.set_local("x @ main.cpp:23", "invalid")["success"]) + self.assertFalse(self.set_local("x @ main.cpp:27", "invalid")["success"]) - verify_locals["x @ main.cpp:19"]["equals"]["value"] = "19" - verify_locals["x @ main.cpp:21"]["equals"]["value"] = "21" - verify_locals["x @ main.cpp:23"]["equals"]["value"] = "23" + verify_locals["x @ main.cpp:23"]["equals"]["value"] = "19" + verify_locals["x @ main.cpp:25"]["equals"]["value"] = "21" + verify_locals["x @ main.cpp:27"]["equals"]["value"] = "23" self.verify_variables(verify_locals, self.dap_server.get_local_variables()) # The plain x variable shold refer to the innermost x self.assertTrue(self.set_local("x", 22)["success"]) - verify_locals["x @ main.cpp:23"]["equals"]["value"] = "22" + verify_locals["x @ main.cpp:27"]["equals"]["value"] = "22" self.verify_variables(verify_locals, self.dap_server.get_local_variables()) @@ -382,9 +384,9 @@ def do_test_scopes_variables_setVariable_evaluate( names = [var["name"] for var in locals] # The first shadowed x shouldn't have a suffix anymore verify_locals["x"] = {"equals": {"type": "int", "value": "19"}} - self.assertNotIn("x @ main.cpp:19", names) - self.assertNotIn("x @ main.cpp:21", names) self.assertNotIn("x @ main.cpp:23", names) + self.assertNotIn("x @ main.cpp:25", names) + self.assertNotIn("x @ main.cpp:27", names) self.verify_variables(verify_locals, locals) @@ -455,6 +457,22 @@ def do_test_scopes_and_evaluate_expansion(self, enableAutoVariableSummaries: boo }, "readOnly": True, }, + "valid_str": { + "equals": { + "type": "const char *", + }, + "matches": { + "value": re.compile(r'0x\w+ "πΆπ°LπΎπ CππΌπ΄π"'), + }, + }, + "malformed_str": { + "equals": { + "type": "const char *", + }, + "matches": { + "value": re.compile(r'0x\w+ "lone trailing \\x81\\x82 bytes"'), + }, + }, "x": { "equals": {"type": "int"}, "missing": ["indexedVariables"], @@ -678,6 +696,8 @@ def test_return_variables(self): "argc": {}, "argv": {}, "pt": {"readOnly": True}, + "valid_str": {}, + "malformed_str": {}, "x": {}, "return_result": {"equals": {"type": "int"}}, } diff --git a/lldb/test/API/tools/lldb-dap/variables/main.cpp b/lldb/test/API/tools/lldb-dap/variables/main.cpp index 0e363001f2f42..04fc62f02c22f 100644 --- a/lldb/test/API/tools/lldb-dap/variables/main.cpp +++ b/lldb/test/API/tools/lldb-dap/variables/main.cpp @@ -5,6 +5,7 @@ struct PointType { int y; int buffer[BUFFER_SIZE]; }; +#include <cstdio> #include <vector> int g_global = 123; static int s_global = 234; @@ -16,6 +17,9 @@ int main(int argc, char const *argv[]) { PointType pt = {11, 22, {0}}; for (int i = 0; i < BUFFER_SIZE; ++i) pt.buffer[i] = i; + const char *valid_str = "πΆπ°LπΎπ CππΌπ΄π"; + const char *malformed_str = "lone trailing \x81\x82 bytes"; + printf("print malformed utf8 %s %s\n", valid_str, malformed_str); int x = s_global - g_global - pt.y; // breakpoint 1 { int x = 42; diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp index b76b05c5d1459..a629453dd5347 100644 --- a/lldb/tools/lldb-dap/DAP.cpp +++ b/lldb/tools/lldb-dap/DAP.cpp @@ -411,9 +411,10 @@ void DAP::SendOutput(OutputType o, const llvm::StringRef output) { if (end == llvm::StringRef::npos) end = output.size() - 1; llvm::json::Object event(CreateEventObject("output")); - llvm::json::Object body; - body.try_emplace("category", category); - EmplaceSafeString(body, "output", output.slice(idx, end + 1).str()); + llvm::json::Object body{ + {"category", category}, + {"output", protocol::SanitizedString(output.slice(idx, end + 1).str())}, + }; event.try_emplace("body", std::move(body)); SendJSON(llvm::json::Value(std::move(event))); idx = end + 1; diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp index 72359214c8537..a9a929f9b88d2 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp +++ b/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp @@ -57,6 +57,23 @@ bool fromJSON(const json::Value &Params, MessageType &M, json::Path P) { return true; } +json::Value toJSON(const SanitizedString &S) { + if (LLVM_LIKELY(llvm::json::isUTF8(std::string(S)))) + return std::string(S); + llvm::errs() << "Here!\n"; + return llvm::json::fixUTF8(std::string(S)); +} + +bool fromJSON(const llvm::json::Value &Param, SanitizedString &Str, + llvm::json::Path Path) { + if (auto s = Param.getAsString()) { + Str = *s; + return true; + } + Path.report("expected string"); + return false; +} + json::Value toJSON(const Request &R) { assert(R.seq != kCalculateSeq && "invalid seq"); diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h index 09ce6802b17c0..3053312c4660f 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h @@ -20,6 +20,7 @@ #ifndef LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_BASE_H #define LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_BASE_H +#include "llvm/ADT/StringRef.h" #include "llvm/Support/JSON.h" #include <cstdint> #include <optional> @@ -37,6 +38,24 @@ using Id = uint64_t; /// the current session. static constexpr Id kCalculateSeq = UINT64_MAX; +/// A wrapper around a string to ensure the contents are sanitized as utf8 +/// during serialization. This value should be used for any strings that may +/// contain raw data like variable values. +class SanitizedString { +public: + SanitizedString(std::string str) : m_str(str) {} + SanitizedString(llvm::StringRef str) : m_str(str.str()) {} + SanitizedString(const char *str) : m_str(str) {} + SanitizedString() = default; + + operator std::string() const { return m_str; } + +private: + std::string m_str; +}; +llvm::json::Value toJSON(const SanitizedString &s); +bool fromJSON(const llvm::json::Value &, SanitizedString &, llvm::json::Path); + /// A client or debug adapter initiated request. struct Request { /// The command to execute. diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h index 28c9f48200e0c..af50e77ce6ab2 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h @@ -452,7 +452,7 @@ bool fromJSON(const llvm::json::Value &, SetVariableArguments &, /// Response to `setVariable` request. struct SetVariableResponseBody { /// The new value of the variable. - std::string value; + SanitizedString value; /// The type of the new value. Typically shown in the UI when hovering over /// the value. diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h index 71046d24c9787..e627b02e37bd1 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h @@ -21,6 +21,7 @@ #define LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_TYPES_H #include "Protocol/DAPTypes.h" +#include "Protocol/ProtocolBase.h" #include "lldb/lldb-defines.h" #include "lldb/lldb-types.h" #include "llvm/ADT/DenseSet.h" @@ -942,7 +943,7 @@ struct Variable { /// its children are not yet visible. /// /// An empty string can be used if no value should be shown in the UI. - std::string value; + SanitizedString value; /// The type of the variable's value. Typically shown in the UI when hovering /// over the value. `````````` </details> https://github.com/llvm/llvm-project/pull/181261 _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
