https://github.com/JDevlieghere created 
https://github.com/llvm/llvm-project/pull/152212

This conditionally reimplements `PythonString::AsUTF8` using 
`PyUnicode_AsUTF8String` instead of `PyUnicode_AsUTF8AndSize`. The latter is 
slightly more efficient because it caches the result, which also means we can 
return an `llvm::StringRef` instead of having to allocate a `std::string`.

We pick the most efficient one depending on whether or not we're targeting the 
Python 3.10 stable API, however we now always return the result as a 
`std::string`.

>From f47d48db4be67ebeef822ad58a720aa45cfcd9c7 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jo...@devlieghere.com>
Date: Tue, 5 Aug 2025 14:49:30 -0700
Subject: [PATCH] [lldb] Support the Python stable C API in
 PythonString::AsUTF8

This conditionally reimplements PythonString::AsUTF8 using
PyUnicode_AsUTF8String instead of PyUnicode_AsUTF8AndSize. The latter is
slightly more efficient because it caches the result, which also means
we can return a StringRef instead of having to allocate a std::string.

We pick the most efficient one depending on whether or not we're
targeting the Python 3.10 stable API, however we now always return the
result as a std::string.
---
 lldb/bindings/python/python-wrapper.swig      | 14 +++----
 .../Python/PythonDataObjects.cpp              | 38 ++++++++++++-------
 .../Python/PythonDataObjects.h                |  4 +-
 .../Python/ScriptInterpreterPython.cpp        |  8 ++--
 4 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/lldb/bindings/python/python-wrapper.swig 
b/lldb/bindings/python/python-wrapper.swig
index 2c30d536a753d..7f00ef43913f8 100644
--- a/lldb/bindings/python/python-wrapper.swig
+++ b/lldb/bindings/python/python-wrapper.swig
@@ -173,7 +173,7 @@ bool 
lldb_private::python::SWIGBridge::LLDBSwigPythonCallTypeScript(
   else
     result = pfunc(value_arg, dict, SWIGBridge::ToSWIGWrapper(*options_sp));
 
-  retval = result.Str().GetString().str();
+  retval = result.Str().GetString();
 
   return true;
 }
@@ -604,7 +604,7 @@ 
lldb_private::python::SWIGBridge::LLDBSwigPythonGetRepeatCommandForScriptedComma
   if (result.IsNone())
     return std::nullopt;
 
-  return result.Str().GetString().str();
+  return result.Str().GetString();
 }
 
 StructuredData::DictionarySP
@@ -807,7 +807,7 @@ bool 
lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordProcess(
 
   auto result = pfunc(SWIGBridge::ToSWIGWrapper(process), dict);
 
-  output = result.Str().GetString().str();
+  output = result.Str().GetString();
 
   return true;
 }
@@ -831,7 +831,7 @@ std::optional<std::string> 
lldb_private::python::SWIGBridge::LLDBSWIGPythonRunSc
 
   auto result = pfunc(SWIGBridge::ToSWIGWrapper(std::move(thread)), dict);
 
-  return result.Str().GetString().str();
+  return result.Str().GetString();
 }
 
 bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordTarget(
@@ -854,7 +854,7 @@ bool 
lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordTarget(
 
   auto result = pfunc(SWIGBridge::ToSWIGWrapper(target), dict);
 
-  output = result.Str().GetString().str();
+  output = result.Str().GetString();
 
   return true;
 }
@@ -878,7 +878,7 @@ std::optional<std::string> 
lldb_private::python::SWIGBridge::LLDBSWIGPythonRunSc
 
   auto result = pfunc(SWIGBridge::ToSWIGWrapper(std::move(frame)), dict);
 
-  return result.Str().GetString().str();
+  return result.Str().GetString();
 }
 
 bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordValue(
@@ -901,7 +901,7 @@ bool 
lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordValue(
 
   auto result = pfunc(SWIGBridge::ToSWIGWrapper(value), dict);
 
-  output = result.Str().GetString().str();
+  output = result.Str().GetString();
 
   return true;
 }
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp 
b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
index 4a45673a6ebbc..7845e56c8c05c 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
@@ -405,28 +405,40 @@ bool PythonString::Check(PyObject *py_obj) {
   return false;
 }
 
-llvm::StringRef PythonString::GetString() const {
+std::string PythonString::GetString() const {
   auto s = AsUTF8();
   if (!s) {
     llvm::consumeError(s.takeError());
-    return llvm::StringRef("");
+    return "";
   }
   return s.get();
 }
 
-Expected<llvm::StringRef> PythonString::AsUTF8() const {
+Expected<std::string> PythonString::AsUTF8() const {
   if (!IsValid())
     return nullDeref();
 
-  Py_ssize_t size;
-  const char *data;
+  // PyUnicode_AsUTF8AndSize caches the UTF-8 representation of the string in
+  // the Unicode object, making it more efficient than PyUnicode_AsUTF8String.
+#if defined(Py_LIMITED_API) && (Py_LIMITED_API < 0x030a0000)
+  PyObject *py_bytes = PyUnicode_AsUTF8String(m_py_obj);
+  if (!py_bytes)
+    return exception();
 
-  data = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
+  auto release_py_str =
+      llvm::make_scope_exit([py_bytes] { Py_DECREF(py_bytes); });
 
-  if (!data)
+  Py_ssize_t size = PyBytes_Size(py_bytes);
+  const char *str = PyBytes_AsString(py_bytes);
+#else
+  Py_ssize_t size;
+  const char *str = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
+#endif
+
+  if (!str)
     return exception();
 
-  return llvm::StringRef(data, size);
+  return std::string(str, size);
 }
 
 size_t PythonString::GetSize() const {
@@ -1248,12 +1260,12 @@ class TextPythonFile : public PythonIOFile {
       // EOF
       return Status();
     }
-    auto stringref = pystring.get().AsUTF8();
-    if (!stringref)
+    auto str = pystring.get().AsUTF8();
+    if (!str)
       // Cloning since the wrapped exception may still reference the PyThread.
-      return Status::FromError(stringref.takeError()).Clone();
-    num_bytes = stringref.get().size();
-    memcpy(buf, stringref.get().begin(), num_bytes);
+      return Status::FromError(str.takeError()).Clone();
+    num_bytes = str->size();
+    memcpy(buf, str->c_str(), num_bytes);
     return Status();
   }
 };
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h 
b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
index 45bb499248329..9586eedd03176 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
@@ -460,9 +460,9 @@ class PythonString : public TypedPythonObject<PythonString> 
{
 
   static bool Check(PyObject *py_obj);
 
-  llvm::StringRef GetString() const; // safe, empty string on error
+  std::string GetString() const; // safe, empty string on error
 
-  llvm::Expected<llvm::StringRef> AsUTF8() const;
+  llvm::Expected<std::string> AsUTF8() const;
 
   size_t GetSize() const;
 
diff --git 
a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp 
b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index 24d604f22a765..b98ff035e5798 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -2715,8 +2715,8 @@ bool 
ScriptInterpreterPythonImpl::GetShortHelpForCommandObject(
 
   if (py_return.IsAllocated() && PythonString::Check(py_return.get())) {
     PythonString py_string(PyRefType::Borrowed, py_return.get());
-    llvm::StringRef return_data(py_string.GetString());
-    dest.assign(return_data.data(), return_data.size());
+    std::string return_data = py_string.GetString();
+    dest.assign(return_data.c_str(), return_data.size());
     return true;
   }
 
@@ -2996,8 +2996,8 @@ bool 
ScriptInterpreterPythonImpl::GetLongHelpForCommandObject(
   bool got_string = false;
   if (py_return.IsAllocated() && PythonString::Check(py_return.get())) {
     PythonString str(PyRefType::Borrowed, py_return.get());
-    llvm::StringRef str_data(str.GetString());
-    dest.assign(str_data.data(), str_data.size());
+    std::string str_data = str.GetString();
+    dest.assign(str_data.c_str(), str_data.size());
     got_string = true;
   }
 

_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to