This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 282743a7f1dcfd947e7474e5d911230df71f12bf
Author: Chris Hajas <[email protected]>
AuthorDate: Fri Sep 8 10:13:48 2023 -0700

    Optimize string creation in Orca (#16332)
    
    Previously, in order convert from a string literal into a CName (which
    is heavily used in Orca), it would build this using helper methods that
    created unnecessary data structures like DXL objects and dynamic
    strings. Instead, build this in a simpler way. This reduces optimization
    time by for queries ~12% when reading in strings to the relcache.
---
 .../libgpos/include/gpos/string/CStringStatic.h    |  2 --
 .../libgpos/include/gpos/string/CWStringBase.h     |  1 +
 .../libgpos/include/gpos/string/CWStringConst.h    |  1 +
 .../src/unittest/gpos/string/CWStringTest.cpp      |  4 +++
 .../gporca/libgpos/src/string/CWStringConst.cpp    | 34 ++++++++++++++++++++++
 src/backend/gporca/libnaucrates/src/CDXLUtils.cpp  | 11 ++++---
 6 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/src/backend/gporca/libgpos/include/gpos/string/CStringStatic.h 
b/src/backend/gporca/libgpos/include/gpos/string/CStringStatic.h
index 3e4c15d270..28b132f7df 100644
--- a/src/backend/gporca/libgpos/include/gpos/string/CStringStatic.h
+++ b/src/backend/gporca/libgpos/include/gpos/string/CStringStatic.h
@@ -15,8 +15,6 @@
 #include "gpos/base.h"
 #include "gpos/common/clibwrapper.h"
 
-#define GPOS_SZ_LENGTH(x) gpos::clib::Strlen(x)
-
 // use this character to substitute non-ASCII wide characters
 #define GPOS_WCHAR_UNPRINTABLE '.'
 
diff --git a/src/backend/gporca/libgpos/include/gpos/string/CWStringBase.h 
b/src/backend/gporca/libgpos/include/gpos/string/CWStringBase.h
index 56738a379c..199e3be039 100644
--- a/src/backend/gporca/libgpos/include/gpos/string/CWStringBase.h
+++ b/src/backend/gporca/libgpos/include/gpos/string/CWStringBase.h
@@ -16,6 +16,7 @@
 
 #define GPOS_WSZ_LENGTH(x) gpos::clib::Wcslen(x)
 #define GPOS_WSZ_STR_LENGTH(x) GPOS_WSZ_LIT(x), 
GPOS_WSZ_LENGTH(GPOS_WSZ_LIT(x))
+#define GPOS_SZ_LENGTH(x) gpos::clib::Strlen(x)
 
 #define WCHAR_EOS GPOS_WSZ_LIT('\0')
 
diff --git a/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h 
b/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h
index 99293f1ee6..448dd34e29 100644
--- a/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h
+++ b/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h
@@ -37,6 +37,7 @@ public:
        // ctors
        CWStringConst(const WCHAR *w_str_buffer);
        CWStringConst(CMemoryPool *mp, const WCHAR *w_str_buffer);
+       CWStringConst(CMemoryPool *mp, const CHAR *str_buffer);
 
        // shallow copy ctor
        CWStringConst(const CWStringConst &);
diff --git 
a/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp 
b/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp
index e561a937f0..06d0f9b524 100644
--- 
a/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp
+++ 
b/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp
@@ -341,10 +341,14 @@ CWStringTest::EresUnittest_Initialize()
        CWStringConst *pcstr1 = GPOS_NEW(mp) CWStringConst(GPOS_WSZ_LIT("123"));
        GPOS_ASSERT(pcstr1->Equals(&cstr1));
 
+       CWStringConst *pcstr2 = GPOS_NEW(mp) CWStringConst(mp, "12345");
+       GPOS_ASSERT(5 == pcstr2->Length());
+
        // cleanup
        GPOS_DELETE(pstr1);
        GPOS_DELETE(pstr2);
        GPOS_DELETE(pcstr1);
+       GPOS_DELETE(pcstr2);
 
 #endif // #ifdef GPOS_DEBUG
        return GPOS_OK;
diff --git a/src/backend/gporca/libgpos/src/string/CWStringConst.cpp 
b/src/backend/gporca/libgpos/src/string/CWStringConst.cpp
index 31f0be1546..ec4fb7d479 100644
--- a/src/backend/gporca/libgpos/src/string/CWStringConst.cpp
+++ b/src/backend/gporca/libgpos/src/string/CWStringConst.cpp
@@ -71,6 +71,40 @@ CWStringConst::CWStringConst(CMemoryPool *mp, const WCHAR 
*w_str_buffer)
        GPOS_ASSERT(IsValid());
 }
 
+//---------------------------------------------------------------------------
+//     @function:
+//             CWStringConst::CWStringConst
+//
+//     @doc:
+//             Initializes a constant string by making a copy of the given 
character buffer.
+//             The string owns the memory.
+//
+//---------------------------------------------------------------------------
+CWStringConst::CWStringConst(CMemoryPool *mp, const CHAR *str_buffer)
+       : CWStringBase(GPOS_SZ_LENGTH(str_buffer),
+                                  true  // owns_memory
+                                  ),
+         m_w_str_buffer(nullptr)
+{
+       GPOS_ASSERT(nullptr != mp);
+       GPOS_ASSERT(nullptr != str_buffer);
+
+       if (0 == m_length)
+       {
+               // string is empty
+               m_w_str_buffer = &m_empty_wcstr;
+       }
+       else
+       {
+               WCHAR *w_str_buffer = GPOS_NEW_ARRAY(mp, WCHAR, m_length + 1);
+               clib::Mbstowcs(w_str_buffer, str_buffer, m_length + 1);
+               m_w_str_buffer = w_str_buffer;
+               m_length = GPOS_WSZ_LENGTH(w_str_buffer);
+       }
+
+       GPOS_ASSERT(IsValid());
+}
+
 //---------------------------------------------------------------------------
 //     @function:
 //             CWStringConst::CWStringConst
diff --git a/src/backend/gporca/libnaucrates/src/CDXLUtils.cpp 
b/src/backend/gporca/libnaucrates/src/CDXLUtils.cpp
index 9e7e5b0401..c59ffdf6bd 100644
--- a/src/backend/gporca/libnaucrates/src/CDXLUtils.cpp
+++ b/src/backend/gporca/libnaucrates/src/CDXLUtils.cpp
@@ -1515,14 +1515,13 @@ CDXLUtils::CreateMDNameFromCharArray(CMemoryPool *mp, 
const CHAR *c)
 {
        GPOS_ASSERT(nullptr != c);
 
-       CWStringDynamic *dxl_string =
-               CDXLUtils::CreateDynamicStringFromCharArray(mp, c);
-       CMDName *md_name = GPOS_NEW(mp) CMDName(mp, dxl_string);
+       // The CMDName will take ownership of the buffer. This ensures we 
minimize allocations
+       // and improves performance for this very hot code path
+       const CWStringConst *str = GPOS_NEW(mp) CWStringConst(mp, c);
 
-       // CMDName ctor created a copy of the string
-       GPOS_DELETE(dxl_string);
+       CMDName *mdname = GPOS_NEW(mp) CMDName(str, true /* owns_memory */);
 
-       return md_name;
+       return mdname;
 }
 
 //---------------------------------------------------------------------------


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to