This is an automated email from the ASF dual-hosted git repository. jiaqizho pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 14a78014f5488d89fbc5f48a341f10c2798fa227 Author: nishant sharma <nishan...@nishants4-a01.vmware.com> AuthorDate: Wed Nov 23 20:43:20 2022 +0530 Enable direct dispatch if distribution column is of 'varchar' type and update hash function selection based on distribution policy of table ENABLE DIRECT DISPATCH ORCA did not perform direct dispatch when the distribution column was of type 'varchar' With this update this error is fixed. For direct dispatch, in the CTranslatorExprToDXLUtils::FDirectDispatchable(,,) datum and distribution column OIds are compared. Direct dispatch is performed, if they are equal. Consider the following query which failed for a table (create table t1_varchar(col1_varchar varchar, col2_int int)): explain select * from t1_varchar where col1_varchar = ācā; In this case both the Old field in IMDid class, were different, thus direct dispatch was not performed. To fix this, it is checked if a cast exist between them and if that cast is binary coercible. UPDATE HASH FUNCTION SELECTION After the code was corrected for direct dispatch, a bug surfaced out in the code due the fix. During the pipeline run, it was found that following tests in citext.sql were failed SETUP CREATE EXTENSION citext; CREATE TEMP TABLE srt ( name CITEXT); INSERT INTO srt (name) VALUES ('abb'), ('ABA'), ('ABC'), ('abd'); QUERIES SELECT LOWER(name) as aba FROM srt WHERE name = 'ABA'::text; SELECT LOWER(name) as aba FROM srt WHERE name = 'ABA'::varchar; SELECT LOWER(name) as aba FROM srt WHERE name = 'ABA'::bpchar; Before the fix for direct dispatch, these queries were running successfully as direct dispatch was not performed for them, Planner on the other hand was performing direct dispatch for them. With the update in direct dispatch concept, ORCA was trying to perform direct dispatch in these queries also, but a wrong segment number was generated by the hashing function. On debugging the code, it was found that, wrong segment number was selected due to selection of wrong hash function. On further analysis, it was found that for direct dispatch ORCA selected hash function based on the const typeoid. Planner on the other hand selected hash function based on the distribution policy of the relation.To correct this bug, the hash function selection for direct dispatch was also made on the distribution policy of the relation. Implementation Key points 1. During the DXL to planned statement stage, we extract the information of available RTEs in FROM clause 2. We perform direct dispatch based on the distribution policy, if there is only one unique RTE in FROM clause of query, else we go back to the old logic. --- .../gpopt/translate/CTranslatorDXLToPlStmt.cpp | 101 +++++-- .../gpopt/translate/CTranslatorExprToDXLUtils.h | 3 +- .../src/translate/CTranslatorExprToDXLUtils.cpp | 76 ++++- .../gpopt/translate/CTranslatorDXLToPlStmt.h | 6 +- .../regress/expected/bfv_dd_types_optimizer.out | 2 +- src/test/regress/expected/direct_dispatch.out | 322 +++++++++++++++++++++ .../regress/expected/direct_dispatch_optimizer.out | 321 ++++++++++++++++++++ src/test/regress/expected/inherit_optimizer.out | 6 +- .../expected/qp_targeted_dispatch_optimizer.out | 2 +- src/test/regress/expected/window_optimizer.out | 4 +- src/test/regress/sql/direct_dispatch.sql | 82 ++++++ 11 files changed, 891 insertions(+), 34 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index 6d9fcbc411..704a1da602 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -197,8 +197,13 @@ CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode, // collect oids from rtable List *oids_list = NIL; + // collect unique RTE in FROM Clause + List *oids_list_unique = NIL; + ListCell *lc_rte = nullptr; + RangeTblEntry *pRTEHashFuncCal = nullptr; + ForEach(lc_rte, m_dxl_to_plstmt_context->GetRTableEntriesList()) { RangeTblEntry *pRTE = (RangeTblEntry *) lfirst(lc_rte); @@ -206,9 +211,32 @@ CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode, if (pRTE->rtekind == RTE_RELATION) { oids_list = gpdb::LAppendOid(oids_list, pRTE->relid); + if (pRTE->inFromCl || (CMD_INSERT == m_cmd_type)) + { + // If we have only one RTE in the FROM clause, + // then we use it to extract information + // about the distribution policy, which gives info about the + // typeOid used for direct dispatch. This helps to perform + // direct dispatch based on the distribution column type + // inplace of the constant in the filter. + pRTEHashFuncCal = (RangeTblEntry *) lfirst(lc_rte); + + // collecting only unique RTE in FROM clause + oids_list_unique = + list_append_unique_oid(oids_list_unique, pRTE->relid); + } } } + if (gpdb::ListLength(oids_list_unique) > 1) + { + // If we have a scenario with multiple unique RTE + // in "from" clause, then the hash function selection + // based on distribution policy of relation will not work + // and we switch back to selection based on constant type + pRTEHashFuncCal = nullptr; + } + // assemble planned stmt PlannedStmt *planned_stmt = MakeNode(PlannedStmt); planned_stmt->planGen = PLANGEN_OPTIMIZER; @@ -246,8 +274,8 @@ CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode, if (CMD_SELECT == m_cmd_type && nullptr != dxlnode->GetDXLDirectDispatchInfo()) { - List *direct_dispatch_segids = - TranslateDXLDirectDispatchInfo(dxlnode->GetDXLDirectDispatchInfo()); + List *direct_dispatch_segids = TranslateDXLDirectDispatchInfo( + dxlnode->GetDXLDirectDispatchInfo(), pRTEHashFuncCal); if (direct_dispatch_segids != NIL) { @@ -269,7 +297,7 @@ CTranslatorDXLToPlStmt::GetPlannedStmtFromDXL(const CDXLNode *dxlnode, CDXLPhysicalDML::Cast(dxlnode->GetOperator()); List *direct_dispatch_segids = TranslateDXLDirectDispatchInfo( - phy_dml_dxlop->GetDXLDirectDispatchInfo()); + phy_dml_dxlop->GetDXLDirectDispatchInfo(), pRTEHashFuncCal); if (direct_dispatch_segids != NIL) { topslice->directDispatch.isDirectDispatch = true; @@ -4495,7 +4523,8 @@ CTranslatorDXLToPlStmt::TranslateDXLDml( //--------------------------------------------------------------------------- List * CTranslatorDXLToPlStmt::TranslateDXLDirectDispatchInfo( - CDXLDirectDispatchInfo *dxl_direct_dispatch_info) + CDXLDirectDispatchInfo *dxl_direct_dispatch_info, + RangeTblEntry *pRTEHashFuncCal) { if (!optimizer_enable_direct_dispatch || nullptr == dxl_direct_dispatch_info) @@ -4550,14 +4579,14 @@ CTranslatorDXLToPlStmt::TranslateDXLDirectDispatchInfo( return segids_list; } - ULONG hash_code = GetDXLDatumGPDBHash(dxl_datum_array); + ULONG hash_code = GetDXLDatumGPDBHash(dxl_datum_array, pRTEHashFuncCal); for (ULONG ul = 0; ul < length; ul++) { CDXLDatumArray *dispatch_identifier_datum_array = (*dispatch_identifier_datum_arrays)[ul]; GPOS_ASSERT(0 < dispatch_identifier_datum_array->Size()); - ULONG hash_code_new = - GetDXLDatumGPDBHash(dispatch_identifier_datum_array); + ULONG hash_code_new = GetDXLDatumGPDBHash( + dispatch_identifier_datum_array, pRTEHashFuncCal); if (hash_code != hash_code_new) { @@ -4579,25 +4608,63 @@ CTranslatorDXLToPlStmt::TranslateDXLDirectDispatchInfo( // //--------------------------------------------------------------------------- ULONG -CTranslatorDXLToPlStmt::GetDXLDatumGPDBHash(CDXLDatumArray *dxl_datum_array) +CTranslatorDXLToPlStmt::GetDXLDatumGPDBHash(CDXLDatumArray *dxl_datum_array, + RangeTblEntry *pRTEHashFuncCal) { List *consts_list = NIL; Oid *hashfuncs; const ULONG length = dxl_datum_array->Size(); - hashfuncs = (Oid *) gpdb::GPDBAlloc(length * sizeof(Oid)); + if (pRTEHashFuncCal != nullptr) + { + // If we have one unique RTE in FROM clause, + // then we do direct dispatch based on the distribution policy - for (ULONG ul = 0; ul < length; ul++) + gpdb::RelationWrapper rel = gpdb::GetRelation(pRTEHashFuncCal->relid); + GPOS_ASSERT(rel); + GpPolicy *policy = rel->rd_cdbpolicy; + int policy_nattrs = policy->nattrs; + TupleDesc desc = rel->rd_att; + Oid *opclasses = policy->opclasses; + hashfuncs = (Oid *) gpdb::GPDBAlloc(policy_nattrs * sizeof(Oid)); + + for (int i = 0; i < policy_nattrs; i++) + { + AttrNumber attnum = policy->attrs[i]; + Oid typeoid = desc->attrs[attnum - 1].atttypid; + Oid opfamily; + + opfamily = gpdb::GetOpclassFamily(opclasses[i]); + hashfuncs[i] = gpdb::GetHashProcInOpfamily(opfamily, typeoid); + } + for (ULONG ul = 0; ul < length; ul++) + { + CDXLDatum *datum_dxl = (*dxl_datum_array)[ul]; + Const *const_expr = + (Const *) m_translator_dxl_to_scalar->TranslateDXLDatumToScalar( + datum_dxl); + consts_list = gpdb::LAppend(consts_list, const_expr); + } + } + else { - CDXLDatum *datum_dxl = (*dxl_datum_array)[ul]; + // If we have multiple tables in the "from" clause, + // we calculate hashfunction based on the consttype - Const *const_expr = - (Const *) m_translator_dxl_to_scalar->TranslateDXLDatumToScalar( - datum_dxl); - consts_list = gpdb::LAppend(consts_list, const_expr); - hashfuncs[ul] = m_dxl_to_plstmt_context->GetDistributionHashFuncForType( - const_expr->consttype); + hashfuncs = (Oid *) gpdb::GPDBAlloc(length * sizeof(Oid)); + for (ULONG ul = 0; ul < length; ul++) + { + CDXLDatum *datum_dxl = (*dxl_datum_array)[ul]; + + Const *const_expr = + (Const *) m_translator_dxl_to_scalar->TranslateDXLDatumToScalar( + datum_dxl); + consts_list = gpdb::LAppend(consts_list, const_expr); + hashfuncs[ul] = + m_dxl_to_plstmt_context->GetDistributionHashFuncForType( + const_expr->consttype); + } } ULONG hash = diff --git a/src/backend/gporca/libgpopt/include/gpopt/translate/CTranslatorExprToDXLUtils.h b/src/backend/gporca/libgpopt/include/gpopt/translate/CTranslatorExprToDXLUtils.h index 056823046e..9e94283069 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/translate/CTranslatorExprToDXLUtils.h +++ b/src/backend/gporca/libgpopt/include/gpopt/translate/CTranslatorExprToDXLUtils.h @@ -93,7 +93,8 @@ private: // check if the given constant value for a particular distribution column can be used // to identify which segment to direct dispatch to. - static BOOL FDirectDispatchable(const CColRef *pcrDistrCol, + static BOOL FDirectDispatchable(CMDAccessor *md_accessor, + const CColRef *pcrDistrCol, const CDXLDatum *dxl_datum); public: diff --git a/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXLUtils.cpp b/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXLUtils.cpp index 373769580e..92b0fe6aed 100644 --- a/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXLUtils.cpp +++ b/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXLUtils.cpp @@ -1311,7 +1311,8 @@ CTranslatorExprToDXLUtils::GetDXLDirectDispatchInfo( mp, md_accessor, pcrDistrCol, pcnstrDistrCol); CRefCount::SafeRelease(pcnstrDistrCol); - if (nullptr != dxl_datum && FDirectDispatchable(pcrDistrCol, dxl_datum)) + if (nullptr != dxl_datum && + FDirectDispatchable(md_accessor, pcrDistrCol, dxl_datum)) { pdrgpdxldatum->Append(dxl_datum); } @@ -1388,7 +1389,8 @@ CTranslatorExprToDXLUtils::PdxlddinfoSingleDistrKey(CMemoryPool *mp, mp, md_accessor, pcrDistrCol, pcnstrDistrCol); GPOS_ASSERT(nullptr != dxl_datum); - if (FDirectDispatchable(pcrDistrCol, dxl_datum)) + if (FDirectDispatchable(md_accessor, pcrDistrCol, dxl_datum)) + { CDXLDatumArray *pdrgpdxldatum = GPOS_NEW(mp) CDXLDatumArray(mp); @@ -1429,7 +1431,8 @@ CTranslatorExprToDXLUtils::PdxlddinfoSingleDistrKey(CMemoryPool *mp, // //--------------------------------------------------------------------------- BOOL -CTranslatorExprToDXLUtils::FDirectDispatchable(const CColRef *pcrDistrCol, +CTranslatorExprToDXLUtils::FDirectDispatchable(CMDAccessor *md_accessor, + const CColRef *pcrDistrCol, const CDXLDatum *dxl_datum) { GPOS_ASSERT(nullptr != pcrDistrCol); @@ -1445,7 +1448,66 @@ CTranslatorExprToDXLUtils::FDirectDispatchable(const CColRef *pcrDistrCol, BOOL fBothInt = CUtils::FIntType(pmdidDistrCol) && CUtils::FIntType(pmdidDatum); - return fBothInt || (pmdidDatum->Equals(pmdidDistrCol)); + if (fBothInt || (pmdidDatum->Equals(pmdidDistrCol))) + { + return true; + } + else + { + // if both the IMDId have different oids, + // then we check if a cast exist between them + // and if that cast is binary coercible. + // Eg if datum oid id 25(Text) and DistCol oid is 1043(VarChar) + // then since a cast is possible and + // cast is binary coercible, we go ahead with direct dispatch + + const IMDCast *pmdcast_datumToDistrCol; + const IMDCast *pmdcast_distrColToDatum; + + // Checking if cast exist from datum to distribution column + GPOS_TRY + { + // Pmdcast(,) generates an exception + // whenever cast is not possible. + pmdcast_datumToDistrCol = + md_accessor->Pmdcast(pmdidDatum, pmdidDistrCol); + + if ((pmdcast_datumToDistrCol->IsBinaryCoercible())) + { + // cast exist and is between coercible type + return true; + } + } + GPOS_CATCH_EX(ex) + { + GPOS_RESET_EX; + } + GPOS_CATCH_END; + + // Checking if cast exist from distribution column to datum + // eg:explain select gp_segment_id, * from t1_varchar + // where col1_varchar = 'a'::char; + GPOS_TRY + { + // Pmdcast(,) generates an exception + // whenever cast is not possible. + pmdcast_distrColToDatum = + md_accessor->Pmdcast(pmdidDistrCol, pmdidDatum); + + if ((pmdcast_distrColToDatum->IsBinaryCoercible())) + { + // cast exist and is between coercible type + return true; + } + } + GPOS_CATCH_EX(ex) + { + GPOS_RESET_EX; + } + GPOS_CATCH_END; + + return false; + } } //--------------------------------------------------------------------------- @@ -1512,7 +1574,7 @@ CTranslatorExprToDXLUtils::PdrgpdrgpdxldatumFromDisjPointConstraint( CDXLDatum *dxl_datum = PdxldatumFromPointConstraint( mp, md_accessor, pcrDistrCol, pcnstrDistrCol); - if (FDirectDispatchable(pcrDistrCol, dxl_datum)) + if (FDirectDispatchable(md_accessor, pcrDistrCol, dxl_datum)) { CDXLDatumArray *pdrgpdxldatum = GPOS_NEW(mp) CDXLDatumArray(mp); @@ -1546,7 +1608,7 @@ CTranslatorExprToDXLUtils::PdrgpdrgpdxldatumFromDisjPointConstraint( CDXLDatum *dxl_datum = CTranslatorExprToDXLUtils::GetDatumVal( mp, md_accessor, prng->PdatumLeft()); - if (!FDirectDispatchable(pcrDistrCol, dxl_datum)) + if (!FDirectDispatchable(md_accessor, pcrDistrCol, dxl_datum)) { // clean up dxl_datum->Release(); @@ -1565,7 +1627,7 @@ CTranslatorExprToDXLUtils::PdrgpdrgpdxldatumFromDisjPointConstraint( { CDXLDatum *dxl_datum = pcrDistrCol->RetrieveType()->GetDXLDatumNull(mp); - if (!FDirectDispatchable(pcrDistrCol, dxl_datum)) + if (!FDirectDispatchable(md_accessor, pcrDistrCol, dxl_datum)) { // clean up dxl_datum->Release(); diff --git a/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h b/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h index a04c259e69..4aecb8d001 100644 --- a/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h +++ b/src/include/gpopt/translate/CTranslatorDXLToPlStmt.h @@ -570,10 +570,12 @@ private: // compute directed dispatch segment ids List *TranslateDXLDirectDispatchInfo( - CDXLDirectDispatchInfo *dxl_direct_dispatch_info); + CDXLDirectDispatchInfo *dxl_direct_dispatch_info, + RangeTblEntry *pRTEHashFuncCal); // hash a DXL datum with GPDB's hash function - ULONG GetDXLDatumGPDBHash(CDXLDatumArray *dxl_datum_array); + ULONG GetDXLDatumGPDBHash(CDXLDatumArray *dxl_datum_array, + RangeTblEntry *pRTEHashFuncCal); // translate nest loop colrefs to GPDB nestparams static List *TranslateNestLoopParamList( diff --git a/src/test/regress/expected/bfv_dd_types_optimizer.out b/src/test/regress/expected/bfv_dd_types_optimizer.out index 787cf0003f..c878068601 100644 --- a/src/test/regress/expected/bfv_dd_types_optimizer.out +++ b/src/test/regress/expected/bfv_dd_types_optimizer.out @@ -170,7 +170,7 @@ INFO: (slice 1) Dispatch command to SINGLE content -- TODO: this currently not directly dispatched (AGL-1246) select * from direct_test_type_cidr where x = '68.44.55.111'; -INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 +INFO: (slice 1) Dispatch command to SINGLE content x ----------------- 68.44.55.111/32 diff --git a/src/test/regress/expected/direct_dispatch.out b/src/test/regress/expected/direct_dispatch.out index 672a472b7b..e8ccf59dfb 100644 --- a/src/test/regress/expected/direct_dispatch.out +++ b/src/test/regress/expected/direct_dispatch.out @@ -907,6 +907,328 @@ INFO: (slice 1) Dispatch command to PARTIAL contents: 1 0 1 | 1 | 1 (2 rows) +--test direct dispatch if distribution column is of varchar type +drop table if exists t1_varchar; +NOTICE: table "t1_varchar" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +create table t1_varchar(col1_varchar varchar, col2_int int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1_varchar' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +insert into t1_varchar values ('a',1); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('b',2); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('c',3); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('d',4); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('e',5); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('97',6); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c'; + QUERY PLAN +---------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text = 'c'::text) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'c'; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | c | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar <>'c'; + QUERY PLAN +----------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text <> 'c'::text) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar <>'c'; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 + 1 | e | 5 + 0 | d | 4 + 2 | a | 1 + 2 | 97 | 6 +(5 rows) + +--test direct dispatch if distribution column is of varchar type and disjunction scenario +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar in ('a','b'); + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text = ANY ('{a,b}'::text[])) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar in ('a','b'); +INFO: (slice 1) Dispatch command to PARTIAL contents: 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 + 2 | a | 1 +(2 rows) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' or col1_varchar = 'b'; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = 'a'::text) OR ((col1_varchar)::text = 'b'::text)) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' or col1_varchar = 'b'; +INFO: (slice 1) Dispatch command to PARTIAL contents: 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 + 2 | a | 1 +(2 rows) + +--test direct dispatch if distribution column is of varchar type, having disjunction condition +-- or an additional conjunction constraint using another table column or both +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c' and col2_int=3; + QUERY PLAN +------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = 'c'::text) AND (col2_int = 3)) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'c' and col2_int=3; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | c | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int in (1,3); + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col2_int = ANY ('{1,3}'::integer[])) AND ((col1_varchar)::text = 'a'::text)) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int in (1,3); +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | a | 1 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int not in (2,3); + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col2_int <> ALL ('{2,3}'::integer[])) AND ((col1_varchar)::text = 'a'::text)) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int not in (2,3); +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | a | 1 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar in ('a', 'b') and col2_int=2; + QUERY PLAN +------------------------------------------------------------------------------------- + Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = ANY ('{a,b}'::text[])) AND (col2_int = 2)) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar in ('a', 'b') and col2_int=2; +INFO: (slice 1) Dispatch command to PARTIAL contents: 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where (col1_varchar = 'a' or col1_varchar = 'b') and col2_int=1; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on t1_varchar + Filter: ((col2_int = 1) AND (((col1_varchar)::text = 'a'::text) OR ((col1_varchar)::text = 'b'::text))) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where (col1_varchar = 'a' or col1_varchar = 'b') and col2_int=1; +INFO: (slice 1) Dispatch command to PARTIAL contents: 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | a | 1 +(1 row) + +--Test direct dispatch with explicit typecasting +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 97::VARCHAR; + QUERY PLAN +----------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text = '97'::text) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 97::VARCHAR; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | 97 | 6 +(1 row) + +-- explicit cast using "char", generates a scenario of cast function from Dist Colm to datum in CTranslatorExprToDXLUtils::FDirectDispatchable(,,) +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c'::char; + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::bpchar = 'c'::character(1)) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'c'::char; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | c | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = '2'::char; + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::bpchar = '2'::character(1)) + Optimizer: Postgres query optimizer +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = '2'::char; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- +(0 rows) + +--No direct dispatch case, scenario: cast exists but not binary coercible +drop table if exists t3; +NOTICE: table "t3" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +create table t3 (c1 timestamp without time zone); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +insert into t3 values ('2015-07-03 00:00:00'::timestamp without time zone); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +explain (costs off) select c1 from t3 where c1 = '2015-07-03'::date; + QUERY PLAN +------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t3 + Filter: (c1 = '07-03-2015'::date) + Optimizer: Postgres query optimizer +(4 rows) + +select c1 from t3 where c1 = '2015-07-03'::date; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + c1 +-------------------------- + Fri Jul 03 00:00:00 2015 +(1 row) + +drop table t3; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +drop table t1_varchar; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +--check direct dispatch working based on the distribution policy of relation +drop extension if exists citext cascade; +NOTICE: extension "citext" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +drop table if exists srt_dd; +NOTICE: table "srt_dd" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +CREATE EXTENSION citext; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +create table srt_dd (name CITEXT); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'name' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +INSERT INTO srt_dd (name) +VALUES ('abb'), + ('ABA'), + ('ABC'), + ('abd'); +INFO: (slice 0) Dispatch command to ALL contents: 0 1 2 +INFO: (slice 1) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +explain (costs off) select LOWER(name) as aba FROM srt_dd WHERE name = 'ABA'::text; + QUERY PLAN +---------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on srt_dd + Filter: ((name)::text = 'ABA'::text) + Optimizer: Postgres query optimizer +(4 rows) + +select LOWER(name) as aba FROM srt_dd WHERE name = 'ABA'::text; +INFO: (slice 1) Dispatch command to SINGLE content + aba +----- + aba +(1 row) + +explain (costs off) delete from srt_dd where name='ABA'::text; + QUERY PLAN +---------------------------------------------- + Delete on srt_dd + -> Seq Scan on srt_dd + Filter: ((name)::text = 'ABA'::text) + Optimizer: Postgres query optimizer +(4 rows) + +delete from srt_dd where name='ABA'::text; +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +drop extension if exists citext cascade; +NOTICE: drop cascades to table srt_dd +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +drop table if exists srt_dd; +NOTICE: table "srt_dd" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 -- test direct dispatch via SQLValueFunction and FuncExpr for single row insertion. create table t_sql_value_function1 (a int, b date); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. diff --git a/src/test/regress/expected/direct_dispatch_optimizer.out b/src/test/regress/expected/direct_dispatch_optimizer.out index 106c590805..607aaf80f6 100644 --- a/src/test/regress/expected/direct_dispatch_optimizer.out +++ b/src/test/regress/expected/direct_dispatch_optimizer.out @@ -919,6 +919,327 @@ INFO: (slice 1) Dispatch command to PARTIAL contents: 0 1 0 | 3 | 3 (2 rows) +--test direct dispatch if distribution column is of varchar type +drop table if exists t1_varchar; +NOTICE: table "t1_varchar" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +create table t1_varchar(col1_varchar varchar, col2_int int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1_varchar' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +insert into t1_varchar values ('a',1); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('b',2); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('c',3); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('d',4); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('e',5); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +insert into t1_varchar values ('97',6); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c'; + QUERY PLAN +---------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text = 'c'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'c'; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | c | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar <>'c'; + QUERY PLAN +----------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text <> 'c'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar <>'c'; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 + 1 | e | 5 + 0 | d | 4 + 2 | a | 1 + 2 | 97 | 6 +(5 rows) + +--test direct dispatch if distribution column is of varchar type and disjunction scenario +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar in ('a','b'); + QUERY PLAN +---------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text = ANY ('{a,b}'::text[])) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar in ('a','b'); +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 + 2 | a | 1 +(2 rows) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' or col1_varchar = 'b'; + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = 'a'::text) OR ((col1_varchar)::text = 'b'::text)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' or col1_varchar = 'b'; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 + 2 | a | 1 +(2 rows) + +--test direct dispatch if distribution column is of varchar type, having disjunction condition +-- or an additional conjunction constraint using another table column or both +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c' and col2_int=3; + QUERY PLAN +------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = 'c'::text) AND (col2_int = 3)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'c' and col2_int=3; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | c | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int in (1,3); + QUERY PLAN +------------------------------------------------------------------------------------------------ + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = 'a'::text) AND (col2_int = ANY ('{1,3}'::integer[]))) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int in (1,3); +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | a | 1 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int not in (2,3); + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = 'a'::text) AND (col2_int <> ALL ('{2,3}'::integer[]))) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int not in (2,3); +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | a | 1 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar in ('a', 'b') and col2_int=2; + QUERY PLAN +------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: (((col1_varchar)::text = ANY ('{a,b}'::text[])) AND (col2_int = 2)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar in ('a', 'b') and col2_int=2; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 1 | b | 2 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where (col1_varchar = 'a' or col1_varchar = 'b') and col2_int=1; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1_varchar + Filter: ((((col1_varchar)::text = 'a'::text) OR ((col1_varchar)::text = 'b'::text)) AND (col2_int = 1)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where (col1_varchar = 'a' or col1_varchar = 'b') and col2_int=1; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | a | 1 +(1 row) + +--Test direct dispatch with explicit typecasting +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 97::VARCHAR; + QUERY PLAN +----------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::text = '97'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 97::VARCHAR; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | 97 | 6 +(1 row) + +-- explicit cast using "char", generates a scenario of cast function from Dist Colm to datum in CTranslatorExprToDXLUtils::FDirectDispatchable(,,) +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c'::char; + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::bpchar = 'c'::character(1)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = 'c'::char; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- + 2 | c | 3 +(1 row) + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = '2'::char; + QUERY PLAN +-------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on t1_varchar + Filter: ((col1_varchar)::bpchar = '2'::character(1)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select gp_segment_id, * from t1_varchar where col1_varchar = '2'::char; +INFO: (slice 1) Dispatch command to SINGLE content + gp_segment_id | col1_varchar | col2_int +---------------+--------------+---------- +(0 rows) + +--No direct dispatch case, scenario: cast exists but not binary coercible +drop table if exists t3; +NOTICE: table "t3" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +create table t3 (c1 timestamp without time zone); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +insert into t3 values ('2015-07-03 00:00:00'::timestamp without time zone); +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +explain (costs off) select c1 from t3 where c1 = '2015-07-03'::date; + QUERY PLAN +------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t3 + Filter: (c1 = '07-03-2015'::date) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select c1 from t3 where c1 = '2015-07-03'::date; +INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 + c1 +-------------------------- + Fri Jul 03 00:00:00 2015 +(1 row) + +drop table t3; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +drop table t1_varchar; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +--check direct dispatch working based on the distribution policy of relation +drop extension if exists citext cascade; +NOTICE: extension "citext" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +drop table if exists srt_dd; +NOTICE: table "srt_dd" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 +CREATE EXTENSION citext; +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +create table srt_dd (name CITEXT); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'name' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +INSERT INTO srt_dd (name) +VALUES ('abb'), + ('ABA'), + ('ABC'), + ('abd'); +INFO: (slice 0) Dispatch command to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +explain (costs off) select LOWER(name) as aba FROM srt_dd WHERE name = 'ABA'::text; + QUERY PLAN +---------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on srt_dd + Filter: ((name)::text = 'ABA'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +select LOWER(name) as aba FROM srt_dd WHERE name = 'ABA'::text; +INFO: (slice 1) Dispatch command to SINGLE content + aba +----- + aba +(1 row) + +explain (costs off) delete from srt_dd where name='ABA'::text; + QUERY PLAN +---------------------------------------------- + Delete on srt_dd + -> Seq Scan on srt_dd srt_dd_1 + Filter: ((name)::text = 'ABA'::text) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +delete from srt_dd where name='ABA'::text; +INFO: (slice 0) Dispatch command to SINGLE content +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to SINGLE content +drop extension if exists citext cascade; +NOTICE: drop cascades to table srt_dd +INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 +INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 +drop table if exists srt_dd; +NOTICE: table "srt_dd" does not exist, skipping +INFO: Distributed transaction command 'Distributed Commit (one-phase)' to ALL contents: 0 1 2 -- test direct dispatch via SQLValueFunction and FuncExpr for single row insertion. create table t_sql_value_function1 (a int, b date); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. diff --git a/src/test/regress/expected/inherit_optimizer.out b/src/test/regress/expected/inherit_optimizer.out index a226521214..3efa7a8e25 100644 --- a/src/test/regress/expected/inherit_optimizer.out +++ b/src/test/regress/expected/inherit_optimizer.out @@ -1927,9 +1927,9 @@ explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd' (5 rows) explain (costs off) select * from list_parted where a = 'ab'; - QUERY PLAN ------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +---------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) -> Dynamic Seq Scan on list_parted Number of partitions to scan: 1 Filter: ((a)::text = 'ab'::text) diff --git a/src/test/regress/expected/qp_targeted_dispatch_optimizer.out b/src/test/regress/expected/qp_targeted_dispatch_optimizer.out index 008470335d..abf4565f51 100644 --- a/src/test/regress/expected/qp_targeted_dispatch_optimizer.out +++ b/src/test/regress/expected/qp_targeted_dispatch_optimizer.out @@ -323,7 +323,7 @@ INFO: (slice 0) Dispatch command to ALL contents: 0 1 2 INFO: Distributed transaction command 'Distributed Prepare' to ALL contents: 0 1 2 INFO: Distributed transaction command 'Distributed Commit Prepared' to ALL contents: 0 1 2 select * from bytea where bytea1='d' and cidr1='0.0.0.1'; -INFO: (slice 1) Dispatch command to ALL contents: 0 1 2 +INFO: (slice 1) Dispatch command to SINGLE content bytea1 | cidr1 --------+------------ \x64 | 0.0.0.1/32 diff --git a/src/test/regress/expected/window_optimizer.out b/src/test/regress/expected/window_optimizer.out index 59c2635901..1cafcebc08 100644 --- a/src/test/regress/expected/window_optimizer.out +++ b/src/test/regress/expected/window_optimizer.out @@ -3218,7 +3218,7 @@ SELECT * FROM WHERE depname = 'sales'; QUERY PLAN --------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) + Gather Motion 1:1 (slice1; segments: 1) -> WindowAgg Partition By: (((depname)::text || 'A'::text)), depname -> Sort @@ -3297,7 +3297,7 @@ SELECT * FROM WHERE depname = 'sales'; QUERY PLAN --------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) + Gather Motion 1:1 (slice1; segments: 1) -> WindowAgg Partition By: depname, empno Order By: enroll_date diff --git a/src/test/regress/sql/direct_dispatch.sql b/src/test/regress/sql/direct_dispatch.sql index ab8bafafff..f997b30207 100644 --- a/src/test/regress/sql/direct_dispatch.sql +++ b/src/test/regress/sql/direct_dispatch.sql @@ -349,6 +349,88 @@ select gp_segment_id, * from t_test_dd_via_segid_conj where a in (1,3) and gp_se explain (costs off) select gp_segment_id, * from t_test_dd_via_segid_conj where a in (1,3) and gp_segment_id in (0,1); select gp_segment_id, * from t_test_dd_via_segid_conj where a in (1,3) and gp_segment_id in (0,1); +--test direct dispatch if distribution column is of varchar type +drop table if exists t1_varchar; +create table t1_varchar(col1_varchar varchar, col2_int int); +insert into t1_varchar values ('a',1); +insert into t1_varchar values ('b',2); +insert into t1_varchar values ('c',3); +insert into t1_varchar values ('d',4); +insert into t1_varchar values ('e',5); +insert into t1_varchar values ('97',6); + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c'; +select gp_segment_id, * from t1_varchar where col1_varchar = 'c'; + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar <>'c'; +select gp_segment_id, * from t1_varchar where col1_varchar <>'c'; + +--test direct dispatch if distribution column is of varchar type and disjunction scenario +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar in ('a','b'); +select gp_segment_id, * from t1_varchar where col1_varchar in ('a','b'); + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' or col1_varchar = 'b'; +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' or col1_varchar = 'b'; + +--test direct dispatch if distribution column is of varchar type, having disjunction condition +-- or an additional conjunction constraint using another table column or both +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c' and col2_int=3; +select gp_segment_id, * from t1_varchar where col1_varchar = 'c' and col2_int=3; + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int in (1,3); +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int in (1,3); + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int not in (2,3); +select gp_segment_id, * from t1_varchar where col1_varchar = 'a' and col2_int not in (2,3); + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar in ('a', 'b') and col2_int=2; +select gp_segment_id, * from t1_varchar where col1_varchar in ('a', 'b') and col2_int=2; + +explain (costs off) select gp_segment_id, * from t1_varchar where (col1_varchar = 'a' or col1_varchar = 'b') and col2_int=1; +select gp_segment_id, * from t1_varchar where (col1_varchar = 'a' or col1_varchar = 'b') and col2_int=1; + +--Test direct dispatch with explicit typecasting +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 97::VARCHAR; +select gp_segment_id, * from t1_varchar where col1_varchar = 97::VARCHAR; + +-- explicit cast using "char", generates a scenario of cast function from Dist Colm to datum in CTranslatorExprToDXLUtils::FDirectDispatchable(,,) +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = 'c'::char; +select gp_segment_id, * from t1_varchar where col1_varchar = 'c'::char; + +explain (costs off) select gp_segment_id, * from t1_varchar where col1_varchar = '2'::char; +select gp_segment_id, * from t1_varchar where col1_varchar = '2'::char; + +--No direct dispatch case, scenario: cast exists but not binary coercible +drop table if exists t3; +create table t3 (c1 timestamp without time zone); +insert into t3 values ('2015-07-03 00:00:00'::timestamp without time zone); + +explain (costs off) select c1 from t3 where c1 = '2015-07-03'::date; +select c1 from t3 where c1 = '2015-07-03'::date; + +drop table t3; +drop table t1_varchar; + +--check direct dispatch working based on the distribution policy of relation +drop extension if exists citext cascade; +drop table if exists srt_dd; +CREATE EXTENSION citext; +create table srt_dd (name CITEXT); +INSERT INTO srt_dd (name) +VALUES ('abb'), + ('ABA'), + ('ABC'), + ('abd'); + +explain (costs off) select LOWER(name) as aba FROM srt_dd WHERE name = 'ABA'::text; +select LOWER(name) as aba FROM srt_dd WHERE name = 'ABA'::text; + +explain (costs off) delete from srt_dd where name='ABA'::text; +delete from srt_dd where name='ABA'::text; + +drop extension if exists citext cascade; +drop table if exists srt_dd; + -- test direct dispatch via SQLValueFunction and FuncExpr for single row insertion. create table t_sql_value_function1 (a int, b date); create table t_sql_value_function2 (a date); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org For additional commands, e-mail: commits-h...@cloudberry.apache.org