This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit b1d2e27ef858976fa6c5af5b20ea8741d4f5fc3c Author: David Kimura <[email protected]> AuthorDate: Mon Jul 25 10:46:57 2022 -0700 [ORCA] Fix duplicate stats reset (#13817) Following scenario led to crash due to missing statistics: ```sql CREATE TABLE t1 (c11 varchar, c12 numeric(15,4)); CREATE TABLE t2 (c2 varchar); CREATE TABLE t3 (c3 varchar); SET allow_system_table_mods=true; UPDATE pg_class SET relpages = 97399::int, reltuples = 9106730.0::real, relallvisible = 0::int WHERE relname = 't1'; UPDATE pg_class SET relpages = 68553::int, reltuples = 7054520.0::real, relallvisible = 0::int WHERE relname = 't2'; SET optimizer_join_order=exhaustive; SELECT (SELECT c11 FROM t1) AS column1, (SELECT sum(c12) FROM t1 INNER JOIN t2 ON c11 = c2 INNER JOIN t3 ON c2 = c3 INNER JOIN t3 a1 ON a1.c3 = a2.c3 LEFT OUTER JOIN t3 a3 ON a1.c3 = a3.c3 LEFT OUTER JOIN t3 a4 ON a1.c3 = a4.c3 ) AS column2 FROM t3 a2; ``` Underlying cause is due to the fact that derive and reset for group stats was not symmetric. In the case of "exhaustive", multiple xforms may be run on the same group that derive stats before applying and reset stats afterward. Prior to this commit it was possible to have a group with "dirty" stats where child nodes may have been cleaned up, but the group still tecnically has stats object. If it is a duplicate of another group then it was possible to "trick" the other group into believing that the stats were already derived. That fake news could lead to a crash. Co-authored-by: Jingyu Wang <[email protected]> --- .../gporca/libgpopt/include/gpopt/search/CMemo.h | 3 - src/backend/gporca/libgpopt/src/search/CGroup.cpp | 5 + src/backend/gporca/libgpopt/src/search/CMemo.cpp | 2 - .../gporca/libgpopt/src/xforms/CJoinOrderDP.cpp | 7 ++ src/backend/gporca/server/CMakeLists.txt | 1 + .../include/unittest/gpopt/base/CGroupTest.h | 44 ++++++++ src/backend/gporca/server/src/startup/main.cpp | 6 +- .../server/src/unittest/gpopt/base/CGroupTest.cpp | 121 +++++++++++++++++++++ src/test/regress/expected/bfv_statistic.out | 29 +++++ .../regress/expected/bfv_statistic_optimizer.out | 31 ++++++ src/test/regress/sql/bfv_statistic.sql | 31 ++++++ 11 files changed, 273 insertions(+), 7 deletions(-) diff --git a/src/backend/gporca/libgpopt/include/gpopt/search/CMemo.h b/src/backend/gporca/libgpopt/include/gpopt/search/CMemo.h index 32f3b50e03..b29bb7c3e2 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/search/CMemo.h +++ b/src/backend/gporca/libgpopt/include/gpopt/search/CMemo.h @@ -167,12 +167,9 @@ public: // print memo to output logger void Trace(); -#ifdef GPOS_DEBUG // get group by id CGroup *Pgroup(ULONG id); -#endif // GPOS_DEBUG - }; // class CMemo } // namespace gpopt diff --git a/src/backend/gporca/libgpopt/src/search/CGroup.cpp b/src/backend/gporca/libgpopt/src/search/CGroup.cpp index a068645b96..35877d8434 100644 --- a/src/backend/gporca/libgpopt/src/search/CGroup.cpp +++ b/src/backend/gporca/libgpopt/src/search/CGroup.cpp @@ -1905,6 +1905,11 @@ CGroup::FResetStats() return true; } + if (FDuplicateGroup()) + { + return PgroupDuplicate()->FResetStats(); + } + BOOL fResetStats = false; if (FHasNewLogicalOperators()) { diff --git a/src/backend/gporca/libgpopt/src/search/CMemo.cpp b/src/backend/gporca/libgpopt/src/search/CMemo.cpp index 900da582b7..ff5d7f1e23 100644 --- a/src/backend/gporca/libgpopt/src/search/CMemo.cpp +++ b/src/backend/gporca/libgpopt/src/search/CMemo.cpp @@ -411,7 +411,6 @@ CMemo::PexprExtractPlan(CMemoryPool *mp, CGroup *pgroupRoot, } -#ifdef GPOS_DEBUG //--------------------------------------------------------------------------- // @function: // CMemo::Pgroup @@ -436,7 +435,6 @@ CMemo::Pgroup(ULONG id) return nullptr; } -#endif // GPOS_DEBUG //--------------------------------------------------------------------------- diff --git a/src/backend/gporca/libgpopt/src/xforms/CJoinOrderDP.cpp b/src/backend/gporca/libgpopt/src/xforms/CJoinOrderDP.cpp index 7badd35f82..bbd55ff9ca 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CJoinOrderDP.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CJoinOrderDP.cpp @@ -625,6 +625,13 @@ CJoinOrderDP::DCost(CExpression *pexpr) const ULONG arity = pexpr->Arity(); if (0 == arity) { + if (nullptr == pexpr->Pstats()) + { + GPOS_RAISE( + CException::ExmaInvalid, CException::ExmiAssert, + GPOS_WSZ_LIT("stats were not derived on an input component")); + } + // leaf operator, use its estimated number of rows as cost dCost = CDouble(pexpr->Pstats()->Rows()); } diff --git a/src/backend/gporca/server/CMakeLists.txt b/src/backend/gporca/server/CMakeLists.txt index d764dc5bd4..3c175fa9ce 100644 --- a/src/backend/gporca/server/CMakeLists.txt +++ b/src/backend/gporca/server/CMakeLists.txt @@ -526,6 +526,7 @@ add_orca_test(CBindingTest) add_orca_test(CEngineTest) add_orca_test(CEquivalenceClassesTest) add_orca_test(CExpressionTest) +add_orca_test(CGroupTest) add_orca_test(CJoinOrderTest) add_orca_test(CKeyCollectionTest) add_orca_test(CMaxCardTest) diff --git a/src/backend/gporca/server/include/unittest/gpopt/base/CGroupTest.h b/src/backend/gporca/server/include/unittest/gpopt/base/CGroupTest.h new file mode 100644 index 0000000000..b96699560b --- /dev/null +++ b/src/backend/gporca/server/include/unittest/gpopt/base/CGroupTest.h @@ -0,0 +1,44 @@ +//--------------------------------------------------------------------------- +// Greenplum Database +// Copyright (C) 2022 VMware Inc. +// +// @filename: +// CGroupTest.h +// +// @doc: +// Test for CGroup +//--------------------------------------------------------------------------- +#ifndef GPOPT_CGroupTest_H +#define GPOPT_CGroupTest_H + + +#include "gpos/base.h" +#include "gpos/common/CDynamicPtrArray.h" + +#include "gpopt/operators/CExpression.h" + +namespace gpopt +{ +//--------------------------------------------------------------------------- +// @class: +// CGroupTest +// +// @doc: +// Unittests +// +//--------------------------------------------------------------------------- +class CGroupTest +{ +public: + // main driver + static GPOS_RESULT EresUnittest(); + + static GPOS_RESULT EresUnittest_FResetStatsOnCGroupWithDuplicateGroup(); + +}; // class CGroupTest +} // namespace gpopt + +#endif // !GPOPT_CGroupTest_H + + +// EOF diff --git a/src/backend/gporca/server/src/startup/main.cpp b/src/backend/gporca/server/src/startup/main.cpp index 5e0629d9c9..2f85235c88 100644 --- a/src/backend/gporca/server/src/startup/main.cpp +++ b/src/backend/gporca/server/src/startup/main.cpp @@ -50,6 +50,7 @@ #include "unittest/gpopt/base/CDistributionSpecTest.h" #include "unittest/gpopt/base/CEquivalenceClassesTest.h" #include "unittest/gpopt/base/CFunctionalDependencyTest.h" +#include "unittest/gpopt/base/CGroupTest.h" #include "unittest/gpopt/base/CKeyCollectionTest.h" #include "unittest/gpopt/base/CMaxCardTest.h" #include "unittest/gpopt/base/COrderSpecTest.h" @@ -166,8 +167,9 @@ static gpos::CUnittest rgut[] = { GPOS_UNITTEST_STD(CSubqueryHandlerTest), GPOS_UNITTEST_STD(CBindingTest), GPOS_UNITTEST_STD(CXformRightOuterJoin2HashJoinTest), GPOS_UNITTEST_STD(CEngineTest), GPOS_UNITTEST_STD(CEquivalenceClassesTest), - GPOS_UNITTEST_STD(CExpressionTest), GPOS_UNITTEST_STD(CJoinOrderTest), - GPOS_UNITTEST_STD(CKeyCollectionTest), GPOS_UNITTEST_STD(CMaxCardTest), + GPOS_UNITTEST_STD(CGroupTest), GPOS_UNITTEST_STD(CExpressionTest), + GPOS_UNITTEST_STD(CJoinOrderTest), GPOS_UNITTEST_STD(CKeyCollectionTest), + GPOS_UNITTEST_STD(CMaxCardTest), GPOS_UNITTEST_STD(CFunctionalDependencyTest), GPOS_UNITTEST_STD(CNameTest), GPOS_UNITTEST_STD(COrderSpecTest), GPOS_UNITTEST_STD(CRangeTest), GPOS_UNITTEST_STD(CPredicateUtilsTest), diff --git a/src/backend/gporca/server/src/unittest/gpopt/base/CGroupTest.cpp b/src/backend/gporca/server/src/unittest/gpopt/base/CGroupTest.cpp new file mode 100644 index 0000000000..cb5355964e --- /dev/null +++ b/src/backend/gporca/server/src/unittest/gpopt/base/CGroupTest.cpp @@ -0,0 +1,121 @@ +//--------------------------------------------------------------------------- +// Greenplum Database +// Copyright (C) 2022 VMware Inc. +// +// @filename: +// CGroupTest.cpp +// +// @doc: +// Test for CGroup +//--------------------------------------------------------------------------- +#include "unittest/gpopt/base/CGroupTest.h" + +#include "gpos/error/CAutoTrace.h" +#include "gpos/task/CAutoTraceFlag.h" + +#include "gpopt/base/CUtils.h" +#include "gpopt/mdcache/CMDCache.h" +#include "gpopt/operators/CLogicalNAryJoin.h" +#include "gpopt/search/CGroup.h" +#include "gpopt/search/CGroupProxy.h" +#include "gpopt/search/CMemo.h" + +#include "unittest/base.h" +#include "unittest/gpopt/CSubqueryTestUtils.h" + +//--------------------------------------------------------------------------- +// @function: +// CGroupTest::EresUnittest +// +// @doc: +// Unittest for cgroup +// +//--------------------------------------------------------------------------- +GPOS_RESULT +CGroupTest::EresUnittest() +{ + CUnittest rgut[] = { + GPOS_UNITTEST_FUNC(EresUnittest_FResetStatsOnCGroupWithDuplicateGroup), + }; + + return CUnittest::EresExecute(rgut, GPOS_ARRAY_SIZE(rgut)); +} + + +static CExpression * +InsertGroupIntoMemo(CMemo *pmemo, CMemoryPool *mp) +{ + CExpression *pexprGet = CTestUtils::PexprLogicalGet(mp); + pexprGet->Pop()->AddRef(); + CGroupExpression *pgexpr = GPOS_NEW(mp) + CGroupExpression(mp, pexprGet->Pop(), GPOS_NEW(mp) CGroupArray(mp), + CXform::ExfInvalid /*exfidOrigin*/, + nullptr /*pgexprOrigin*/, false /*fIntermediate*/); + + pmemo->PgroupInsert(nullptr, pexprGet, pgexpr); + + return pexprGet; +} + + +GPOS_RESULT +CGroupTest::EresUnittest_FResetStatsOnCGroupWithDuplicateGroup() +{ + CAutoMemoryPool amp; + CMemoryPool *mp = amp.Pmp(); + + // setup a file-based provider + CMDProviderMemory *pmdp = CTestUtils::m_pmdpf; + pmdp->AddRef(); + CMDAccessor mda(mp, CMDCache::Pcache()); + mda.RegisterProvider(CTestUtils::m_sysidDefault, pmdp); + + CAutoOptCtxt aoc(mp, &mda, nullptr, /* pceeval */ + CTestUtils::GetCostModel(mp)); + + CMemo *pmemo = GPOS_NEW(mp) CMemo(mp); + + CExpression *pexprGet1 = InsertGroupIntoMemo(pmemo, mp); + CExpression *pexprGet2 = InsertGroupIntoMemo(pmemo, mp); + + CGroupExpression *pgexprFirst = nullptr; + { + CGroupProxy gp(pmemo->Pgroup(0)); + pgexprFirst = gp.PgexprFirst(); + } + + // mark group (1) as duplicates of group (0) + CMemo::MarkDuplicates(pmemo->Pgroup(0), pmemo->Pgroup(1)); + + CExpressionHandle exprhdl(mp); + exprhdl.Attach(pgexprFirst); + exprhdl.DeriveStats(mp, mp, nullptr, nullptr); + + // After deriving stats on group 0, we should now have idential stats + // reference for group (0) and group (1) + if (pmemo->Pgroup(0)->Pstats() != pmemo->Pgroup(1)->Pstats()) + { + // stat references should be identical for duplicate groups + return GPOS_FAILED; + } + + pmemo->Pgroup(0)->FResetStats(); + + // After resetting stats on group (0), we should have also reset stats on + // group (1) + if (pmemo->Pgroup(0)->Pstats() != nullptr || + pmemo->Pgroup(1)->Pstats() != nullptr) + { + GPOS_DELETE(pmemo); + pexprGet1->Release(); + pexprGet2->Release(); + return GPOS_FAILED; + } + + GPOS_DELETE(pmemo); + pexprGet1->Release(); + pexprGet2->Release(); + return GPOS_OK; +} + +// EOF diff --git a/src/test/regress/expected/bfv_statistic.out b/src/test/regress/expected/bfv_statistic.out index 025671456e..1d3b1ae5e0 100644 --- a/src/test/regress/expected/bfv_statistic.out +++ b/src/test/regress/expected/bfv_statistic.out @@ -444,3 +444,32 @@ where tablename = 'uniformtest'; 40-60 | <= 5 | >= 95 (1 row) +-- ORCA: Test previous scenario with duplicate memo groups running multiple +-- xforms that need stats before applying and reset after applying. It +-- used to be that this scenario could lead to SIGSEGV where stats were +-- reset and were not re-derived between applying the xforms. +SET optimizer_join_order=exhaustive; +SET optimizer_trace_fallback=on; +CREATE TABLE duplicate_memo_group_test_t1 (c11 varchar, c12 integer) DISTRIBUTED BY (c11); +CREATE TABLE duplicate_memo_group_test_t2 (c2 varchar) DISTRIBUTED BY (c2); +CREATE TABLE duplicate_memo_group_test_t3 (c3 varchar) DISTRIBUTED BY (c3); +INSERT INTO duplicate_memo_group_test_t1 SELECT 'something', generate_series(1,900); +INSERT INTO duplicate_memo_group_test_t2 SELECT generate_series(1,900); +ANALYZE duplicate_memo_group_test_t1, duplicate_memo_group_test_t2; +SELECT + (SELECT c11 FROM duplicate_memo_group_test_t1 WHERE c12 = 100) AS column1, + (SELECT sum(c12) +FROM duplicate_memo_group_test_t1 + INNER JOIN duplicate_memo_group_test_t2 ON c11 = c2 + INNER JOIN duplicate_memo_group_test_t3 ON c2 = c3 + INNER JOIN duplicate_memo_group_test_t3 a1 on a1.c3 = a2.c3 + LEFT OUTER JOIN duplicate_memo_group_test_t3 a3 ON a1.c3 = a3.c3 + LEFT OUTER JOIN duplicate_memo_group_test_t3 a4 ON a1.c3 = a4.c3 +) AS column2 +FROM duplicate_memo_group_test_t3 a2; + column1 | column2 +---------+--------- +(0 rows) + +RESET optimizer_join_order; +RESET optimizer_trace_fallback; diff --git a/src/test/regress/expected/bfv_statistic_optimizer.out b/src/test/regress/expected/bfv_statistic_optimizer.out index 202bc0d4ab..43e0a2de3c 100644 --- a/src/test/regress/expected/bfv_statistic_optimizer.out +++ b/src/test/regress/expected/bfv_statistic_optimizer.out @@ -465,3 +465,34 @@ where tablename = 'uniformtest'; 40-60 | <= 5 | >= 95 (1 row) +-- ORCA: Test previous scenario with duplicate memo groups running multiple +-- xforms that need stats before applying and reset after applying. It +-- used to be that this scenario could lead to SIGSEGV where stats were +-- reset and were not re-derived between applying the xforms. +SET optimizer_join_order=exhaustive; +SET optimizer_trace_fallback=on; +CREATE TABLE duplicate_memo_group_test_t1 (c11 varchar, c12 integer) DISTRIBUTED BY (c11); +CREATE TABLE duplicate_memo_group_test_t2 (c2 varchar) DISTRIBUTED BY (c2); +CREATE TABLE duplicate_memo_group_test_t3 (c3 varchar) DISTRIBUTED BY (c3); +INSERT INTO duplicate_memo_group_test_t1 SELECT 'something', generate_series(1,900); +INSERT INTO duplicate_memo_group_test_t2 SELECT generate_series(1,900); +ANALYZE duplicate_memo_group_test_t1, duplicate_memo_group_test_t2; +SELECT + (SELECT c11 FROM duplicate_memo_group_test_t1 WHERE c12 = 100) AS column1, + (SELECT sum(c12) +FROM duplicate_memo_group_test_t1 + INNER JOIN duplicate_memo_group_test_t2 ON c11 = c2 + INNER JOIN duplicate_memo_group_test_t3 ON c2 = c3 + INNER JOIN duplicate_memo_group_test_t3 a1 on a1.c3 = a2.c3 + LEFT OUTER JOIN duplicate_memo_group_test_t3 a3 ON a1.c3 = a3.c3 + LEFT OUTER JOIN duplicate_memo_group_test_t3 a4 ON a1.c3 = a4.c3 +) AS column2 +FROM duplicate_memo_group_test_t3 a2; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: DXL-to-PlStmt Translation: Assert not supported + column1 | column2 +---------+--------- +(0 rows) + +RESET optimizer_join_order; +RESET optimizer_trace_fallback; diff --git a/src/test/regress/sql/bfv_statistic.sql b/src/test/regress/sql/bfv_statistic.sql index 2a72f46595..bac0f897ed 100644 --- a/src/test/regress/sql/bfv_statistic.sql +++ b/src/test/regress/sql/bfv_statistic.sql @@ -344,3 +344,34 @@ select case when avg(bound) between 40 and 60 then '40-60' else avg(bound)::text from pg_stats s, unnest(histogram_bounds::text::int4[]) as bound where tablename = 'uniformtest'; + +-- ORCA: Test previous scenario with duplicate memo groups running multiple +-- xforms that need stats before applying and reset after applying. It +-- used to be that this scenario could lead to SIGSEGV where stats were +-- reset and were not re-derived between applying the xforms. +SET optimizer_join_order=exhaustive; +SET optimizer_trace_fallback=on; + +CREATE TABLE duplicate_memo_group_test_t1 (c11 varchar, c12 integer) DISTRIBUTED BY (c11); +CREATE TABLE duplicate_memo_group_test_t2 (c2 varchar) DISTRIBUTED BY (c2); +CREATE TABLE duplicate_memo_group_test_t3 (c3 varchar) DISTRIBUTED BY (c3); + +INSERT INTO duplicate_memo_group_test_t1 SELECT 'something', generate_series(1,900); +INSERT INTO duplicate_memo_group_test_t2 SELECT generate_series(1,900); + +ANALYZE duplicate_memo_group_test_t1, duplicate_memo_group_test_t2; + +SELECT + (SELECT c11 FROM duplicate_memo_group_test_t1 WHERE c12 = 100) AS column1, + (SELECT sum(c12) +FROM duplicate_memo_group_test_t1 + INNER JOIN duplicate_memo_group_test_t2 ON c11 = c2 + INNER JOIN duplicate_memo_group_test_t3 ON c2 = c3 + INNER JOIN duplicate_memo_group_test_t3 a1 on a1.c3 = a2.c3 + LEFT OUTER JOIN duplicate_memo_group_test_t3 a3 ON a1.c3 = a3.c3 + LEFT OUTER JOIN duplicate_memo_group_test_t3 a4 ON a1.c3 = a4.c3 +) AS column2 +FROM duplicate_memo_group_test_t3 a2; + +RESET optimizer_join_order; +RESET optimizer_trace_fallback; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
