This is an automated email from the ASF dual-hosted git repository. jiaqizho pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 68cdac563bc2f372719de87a70a61feedc883262 Author: Yao Wang <wa...@vmware.com> AuthorDate: Tue Sep 20 09:53:56 2022 +0800 FIXME remove gp_enable_sort_distinct and noduplicates optimizing (#14105) On GPDB6, the GUC gp_enable_sort_distinct is used to optimize sort by indicating "noduplicate" to remove duplicated rows and reduce data handling. It effects on replacement selection sort only. However, on GPDB7, replacement selection sort has been removed completely. So we need to remove gp_enable_sort_distinct and other codes related to "noduplicate" sort. --- src/backend/commands/explain.c | 5 +---- src/backend/executor/nodeSort.c | 5 ----- src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp | 11 +++++------ src/backend/nodes/copyfuncs.c | 3 --- src/backend/nodes/outfuncs.c | 2 -- src/backend/nodes/readfuncs.c | 3 --- src/backend/optimizer/path/allpaths.c | 1 - src/backend/optimizer/plan/createplan.c | 10 ---------- src/backend/utils/misc/guc_gp.c | 10 ---------- src/backend/utils/misc/postgresql.conf.sample | 1 - src/include/cdb/cdbvars.h | 10 ---------- src/include/nodes/execnodes.h | 2 -- src/include/nodes/plannodes.h | 2 -- src/include/utils/unsync_guc_name.h | 1 - src/test/regress/expected/join_gp.out | 2 +- src/test/regress/expected/subselect_gp.out | 11 +++++------ src/test/regress/expected/tpch500GB.out | 2 +- 17 files changed, 13 insertions(+), 68 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index e0f20f94d4..0fe1478635 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3012,10 +3012,7 @@ show_sort_keys(SortState *sortstate, List *ancestors, ExplainState *es) Sort *plan = (Sort *) sortstate->ss.ps.plan; const char *SortKeystr; - if (sortstate->noduplicates) - SortKeystr = "Sort Key (Distinct)"; - else - SortKeystr = "Sort Key"; + SortKeystr = "Sort Key"; show_sort_group_keys((PlanState *) sortstate, SortKeystr, plan->numCols, 0, plan->sortColIdx, diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c index b8c7c47fd1..dcb4ccf4e2 100644 --- a/src/backend/executor/nodeSort.c +++ b/src/backend/executor/nodeSort.c @@ -248,11 +248,6 @@ ExecInitSort(Sort *node, EState *estate, int eflags) */ ExecAssignExprContext(estate, &sortstate->ss.ps); - /* CDB */ /* evaluate a limit as part of the sort */ - { - sortstate->noduplicates = node->noduplicates; - } - /* * Miscellaneous initialization * diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index 090e544a04..e8ea26de50 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -3116,15 +3116,17 @@ CTranslatorDXLToPlStmt::TranslateDXLSort( const CDXLNode *sort_dxlnode, CDXLTranslateContext *output_context, CDXLTranslationContextArray *ctxt_translation_prev_siblings) { + // Ensure operator of sort_dxlnode exists and is EdxlopPhysicalSort + CDXLOperator *sort_dxlop = sort_dxlnode->GetOperator(); + GPOS_ASSERT(nullptr != sort_dxlop); + GPOS_ASSERT(EdxlopPhysicalSort == sort_dxlop->GetDXLOperator()); + // create sort plan node Sort *sort = MakeNode(Sort); Plan *plan = &(sort->plan); plan->plan_node_id = m_dxl_to_plstmt_context->GetNextPlanId(); - CDXLPhysicalSort *sort_dxlop = - CDXLPhysicalSort::Cast(sort_dxlnode->GetOperator()); - // translate operator costs TranslatePlanCosts(sort_dxlnode, plan); @@ -3151,9 +3153,6 @@ CTranslatorDXLToPlStmt::TranslateDXLSort( plan->lefttree = child_plan; - // set sorting info - sort->noduplicates = sort_dxlop->FDiscardDuplicates(); - // translate sorting columns const CDXLNode *sort_col_list_dxl = diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 6a45828c01..50b08fb038 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1298,9 +1298,6 @@ _copySort(const Sort *from) */ CopySortFields(from, newnode); - /* CDB */ - COPY_SCALAR_FIELD(noduplicates); - return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 85fe1a621e..7291841a1b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1013,8 +1013,6 @@ _outSortInfo(StringInfo str, const Sort *node) WRITE_OID_ARRAY(sortOperators, node->numCols); WRITE_OID_ARRAY(collations, node->numCols); WRITE_BOOL_ARRAY(nullsFirst, node->numCols); - /* CDB */ - WRITE_BOOL_FIELD(noduplicates); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 4783e74cba..2bfdae1898 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2388,9 +2388,6 @@ ReadSort(Sort *local_node) READ_TEMP_LOCALS(); ReadCommonSort(local_node); - - /* CDB */ - READ_BOOL_FIELD(noduplicates); } /* diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 19231adde3..10677763b2 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -66,7 +66,6 @@ // TODO: these planner gucs need to be refactored into PlannerConfig. bool gp_enable_sort_limit = false; -bool gp_enable_sort_distinct = false; /* results of subquery_is_pushdown_safe */ typedef struct pushdown_safety_info diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 89bd855ed6..9df1f326d0 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -7296,8 +7296,6 @@ make_sort(Plan *lefttree, int numCols, Assert(sortColIdx[0] != 0); - node->noduplicates = false; /* CDB */ - return node; } @@ -8057,14 +8055,6 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) node->uniqOperators = uniqOperators; node->uniqCollations = uniqCollations; - /* CDB */ /* pass DISTINCT to sort */ - if (IsA(lefttree, Sort) && gp_enable_sort_distinct) - { - Sort *pSort = (Sort *) lefttree; - - pSort->noduplicates = true; - } - return node; } diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 0f7426c6ff..dd177a768a 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -800,16 +800,6 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, - { - {"gp_enable_sort_distinct", PGC_USERSET, QUERY_TUNING_METHOD, - gettext_noop("Enable duplicate removal to be performed while sorting."), - gettext_noop("Reduces data handling when plan calls for removing duplicates from sorted rows.") - }, - &gp_enable_sort_distinct, - true, - NULL, NULL, NULL - }, - { {"gp_enable_motion_deadlock_sanity", PGC_USERSET, DEVELOPER_OPTIONS, gettext_noop("Enable verbose check at planning time."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 0908c41223..31ac67628a 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -396,7 +396,6 @@ max_prepared_transactions = 250 # can be 0 or more #gp_selectivity_damping_for_joins = off #gp_enable_sort_limit = on -#gp_enable_sort_distinct = on # - Planner Cost Constants - diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h index 1d9c72db61..0485d2bf84 100644 --- a/src/include/cdb/cdbvars.h +++ b/src/include/cdb/cdbvars.h @@ -648,16 +648,6 @@ extern int explain_memory_verbosity; */ extern bool gp_enable_sort_limit; -/* May Cloudberry discard duplicate rows in sort if it is is wrapped by a - * DISTINCT clause (unique aggregation operator)? - * - * The code does not currently use planner estimates for this. If enabled, - * the tactic is used whenever possible. - * - * GPDB_12_MERGE_FIXME: Resurrect this - */ -extern bool gp_enable_sort_distinct; - extern bool trace_sort; /** diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 44b54dc68f..a160893618 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2562,8 +2562,6 @@ typedef struct SortState bool am_worker; /* are we a worker? */ SharedSortInfo *shared_info; /* one entry per worker */ - bool noduplicates; /* true if discard duplicate rows */ - bool delayEagerFree; /* is it safe to free memory used by this node, * when this node has outputted its last row? */ TuplesortInstrumentation sortstats; /* holds stats, if the Sort is eagerly free'd */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 773b5c27c6..45eaaa83ae 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1244,8 +1244,6 @@ typedef struct Sort Oid *sortOperators; /* OIDs of operators to sort them by */ Oid *collations; /* OIDs of collations */ bool *nullsFirst; /* NULLS FIRST/LAST directions */ - /* CDB */ - bool noduplicates; /* TRUE if sort should discard duplicates */ } Sort; /* ---------------- diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 43e2bbee69..382a46b6f8 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -200,7 +200,6 @@ "gp_enable_refresh_fast_path", "gp_enable_relsize_collection", "gp_enable_slow_writer_testmode", - "gp_enable_sort_distinct", "gp_enable_sort_limit", "gp_encoding_check_locale_compatibility", "gp_external_enable_exec", diff --git a/src/test/regress/expected/join_gp.out b/src/test/regress/expected/join_gp.out index 6722f5a824..3f2c8cf077 100644 --- a/src/test/regress/expected/join_gp.out +++ b/src/test/regress/expected/join_gp.out @@ -1519,7 +1519,7 @@ select * from foo where exists (select 1 from bar where foo.a = bar.b); -> Unique Group Key: (RowIdExpr) -> Sort - Sort Key (Distinct): (RowIdExpr) + Sort Key: (RowIdExpr) -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: (RowIdExpr) -> Hash Join diff --git a/src/test/regress/expected/subselect_gp.out b/src/test/regress/expected/subselect_gp.out index cd3c95daa7..c447bcc555 100644 --- a/src/test/regress/expected/subselect_gp.out +++ b/src/test/regress/expected/subselect_gp.out @@ -1916,8 +1916,7 @@ select * from dedup_reptab r where r.a in (select t.a/10 from dedup_tab t); Locus: Hashed Group Key: (RowIdExpr) -> Sort - Locus: Hashed - Sort Key (Distinct): (RowIdExpr) + Sort Key: (RowIdExpr) -> Redistribute Motion 3:3 (slice2; segments: 3) Locus: Hashed Hash Key: (RowIdExpr) @@ -1975,7 +1974,7 @@ select * from dedup_srf() r(a) where r.a in (select t.a/10 from dedup_tab t); -> Unique Group Key: (RowIdExpr) -> Sort - Sort Key (Distinct): (RowIdExpr) + Sort Key: (RowIdExpr) -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: (RowIdExpr) -> Hash Join @@ -2004,7 +2003,7 @@ select * from dedup_srf_stable() r(a) where r.a in (select t.a/10 from dedup_tab -> Unique Group Key: (RowIdExpr) -> Sort - Sort Key (Distinct): (RowIdExpr) + Sort Key: (RowIdExpr) -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: (RowIdExpr) -> Hash Join @@ -2033,7 +2032,7 @@ select * from dedup_srf_volatile() r(a) where r.a in (select t.a/10 from dedup_t -> Unique Group Key: (RowIdExpr) -> Sort - Sort Key (Distinct): (RowIdExpr) + Sort Key: (RowIdExpr) -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: (RowIdExpr) -> Hash Join @@ -2113,7 +2112,7 @@ select * from dedup_func_volatile() r(a) where r.a in (select t.a/10 from dedup_ -> Unique Group Key: (RowIdExpr) -> Sort - Sort Key (Distinct): (RowIdExpr) + Sort Key: (RowIdExpr) -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: (RowIdExpr) -> Hash Join diff --git a/src/test/regress/expected/tpch500GB.out b/src/test/regress/expected/tpch500GB.out index 7a36575cfb..49e9040904 100755 --- a/src/test/regress/expected/tpch500GB.out +++ b/src/test/regress/expected/tpch500GB.out @@ -2005,7 +2005,7 @@ order by -> Unique (cost=78801360.21..78983928.42 rows=18256821 width=22) Group By: orders.ctid -> Sort (cost=78801360.21..78892644.32 rows=18256821 width=22) - Sort Key (Distinct): orders.ctid + Sort Key: orders.ctid -> Hash Join (cost=14835362.82..71460313.52 rows=18256821 width=22) Hash Cond: lineitem.l_orderkey = orders.o_orderkey -> Seq Scan on lineitem (cost=0.00..51767512.60 rows=489638315 width=8) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org For additional commands, e-mail: commits-h...@cloudberry.apache.org