This is an automated email from the ASF dual-hosted git repository. reshke pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 282743a7f1dcfd947e7474e5d911230df71f12bf Author: Chris Hajas <[email protected]> AuthorDate: Fri Sep 8 10:13:48 2023 -0700 Optimize string creation in Orca (#16332) Previously, in order convert from a string literal into a CName (which is heavily used in Orca), it would build this using helper methods that created unnecessary data structures like DXL objects and dynamic strings. Instead, build this in a simpler way. This reduces optimization time by for queries ~12% when reading in strings to the relcache. --- .../libgpos/include/gpos/string/CStringStatic.h | 2 -- .../libgpos/include/gpos/string/CWStringBase.h | 1 + .../libgpos/include/gpos/string/CWStringConst.h | 1 + .../src/unittest/gpos/string/CWStringTest.cpp | 4 +++ .../gporca/libgpos/src/string/CWStringConst.cpp | 34 ++++++++++++++++++++++ src/backend/gporca/libnaucrates/src/CDXLUtils.cpp | 11 ++++--- 6 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/backend/gporca/libgpos/include/gpos/string/CStringStatic.h b/src/backend/gporca/libgpos/include/gpos/string/CStringStatic.h index 3e4c15d270..28b132f7df 100644 --- a/src/backend/gporca/libgpos/include/gpos/string/CStringStatic.h +++ b/src/backend/gporca/libgpos/include/gpos/string/CStringStatic.h @@ -15,8 +15,6 @@ #include "gpos/base.h" #include "gpos/common/clibwrapper.h" -#define GPOS_SZ_LENGTH(x) gpos::clib::Strlen(x) - // use this character to substitute non-ASCII wide characters #define GPOS_WCHAR_UNPRINTABLE '.' diff --git a/src/backend/gporca/libgpos/include/gpos/string/CWStringBase.h b/src/backend/gporca/libgpos/include/gpos/string/CWStringBase.h index 56738a379c..199e3be039 100644 --- a/src/backend/gporca/libgpos/include/gpos/string/CWStringBase.h +++ b/src/backend/gporca/libgpos/include/gpos/string/CWStringBase.h @@ -16,6 +16,7 @@ #define GPOS_WSZ_LENGTH(x) gpos::clib::Wcslen(x) #define GPOS_WSZ_STR_LENGTH(x) GPOS_WSZ_LIT(x), GPOS_WSZ_LENGTH(GPOS_WSZ_LIT(x)) +#define GPOS_SZ_LENGTH(x) gpos::clib::Strlen(x) #define WCHAR_EOS GPOS_WSZ_LIT('\0') diff --git a/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h b/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h index 99293f1ee6..448dd34e29 100644 --- a/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h +++ b/src/backend/gporca/libgpos/include/gpos/string/CWStringConst.h @@ -37,6 +37,7 @@ public: // ctors CWStringConst(const WCHAR *w_str_buffer); CWStringConst(CMemoryPool *mp, const WCHAR *w_str_buffer); + CWStringConst(CMemoryPool *mp, const CHAR *str_buffer); // shallow copy ctor CWStringConst(const CWStringConst &); diff --git a/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp b/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp index e561a937f0..06d0f9b524 100644 --- a/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp +++ b/src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp @@ -341,10 +341,14 @@ CWStringTest::EresUnittest_Initialize() CWStringConst *pcstr1 = GPOS_NEW(mp) CWStringConst(GPOS_WSZ_LIT("123")); GPOS_ASSERT(pcstr1->Equals(&cstr1)); + CWStringConst *pcstr2 = GPOS_NEW(mp) CWStringConst(mp, "12345"); + GPOS_ASSERT(5 == pcstr2->Length()); + // cleanup GPOS_DELETE(pstr1); GPOS_DELETE(pstr2); GPOS_DELETE(pcstr1); + GPOS_DELETE(pcstr2); #endif // #ifdef GPOS_DEBUG return GPOS_OK; diff --git a/src/backend/gporca/libgpos/src/string/CWStringConst.cpp b/src/backend/gporca/libgpos/src/string/CWStringConst.cpp index 31f0be1546..ec4fb7d479 100644 --- a/src/backend/gporca/libgpos/src/string/CWStringConst.cpp +++ b/src/backend/gporca/libgpos/src/string/CWStringConst.cpp @@ -71,6 +71,40 @@ CWStringConst::CWStringConst(CMemoryPool *mp, const WCHAR *w_str_buffer) GPOS_ASSERT(IsValid()); } +//--------------------------------------------------------------------------- +// @function: +// CWStringConst::CWStringConst +// +// @doc: +// Initializes a constant string by making a copy of the given character buffer. +// The string owns the memory. +// +//--------------------------------------------------------------------------- +CWStringConst::CWStringConst(CMemoryPool *mp, const CHAR *str_buffer) + : CWStringBase(GPOS_SZ_LENGTH(str_buffer), + true // owns_memory + ), + m_w_str_buffer(nullptr) +{ + GPOS_ASSERT(nullptr != mp); + GPOS_ASSERT(nullptr != str_buffer); + + if (0 == m_length) + { + // string is empty + m_w_str_buffer = &m_empty_wcstr; + } + else + { + WCHAR *w_str_buffer = GPOS_NEW_ARRAY(mp, WCHAR, m_length + 1); + clib::Mbstowcs(w_str_buffer, str_buffer, m_length + 1); + m_w_str_buffer = w_str_buffer; + m_length = GPOS_WSZ_LENGTH(w_str_buffer); + } + + GPOS_ASSERT(IsValid()); +} + //--------------------------------------------------------------------------- // @function: // CWStringConst::CWStringConst diff --git a/src/backend/gporca/libnaucrates/src/CDXLUtils.cpp b/src/backend/gporca/libnaucrates/src/CDXLUtils.cpp index 9e7e5b0401..c59ffdf6bd 100644 --- a/src/backend/gporca/libnaucrates/src/CDXLUtils.cpp +++ b/src/backend/gporca/libnaucrates/src/CDXLUtils.cpp @@ -1515,14 +1515,13 @@ CDXLUtils::CreateMDNameFromCharArray(CMemoryPool *mp, const CHAR *c) { GPOS_ASSERT(nullptr != c); - CWStringDynamic *dxl_string = - CDXLUtils::CreateDynamicStringFromCharArray(mp, c); - CMDName *md_name = GPOS_NEW(mp) CMDName(mp, dxl_string); + // The CMDName will take ownership of the buffer. This ensures we minimize allocations + // and improves performance for this very hot code path + const CWStringConst *str = GPOS_NEW(mp) CWStringConst(mp, c); - // CMDName ctor created a copy of the string - GPOS_DELETE(dxl_string); + CMDName *mdname = GPOS_NEW(mp) CMDName(str, true /* owns_memory */); - return md_name; + return mdname; } //--------------------------------------------------------------------------- --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
