This is an automated email from the ASF dual-hosted git repository.

jiaqizho pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 68cdac563bc2f372719de87a70a61feedc883262
Author: Yao Wang <wa...@vmware.com>
AuthorDate: Tue Sep 20 09:53:56 2022 +0800

    FIXME remove gp_enable_sort_distinct and noduplicates optimizing (#14105)
    
    On GPDB6, the GUC gp_enable_sort_distinct is used to optimize sort by 
indicating
    "noduplicate" to remove duplicated rows and reduce data handling. It 
effects on
    replacement selection sort only. However, on GPDB7, replacement selection 
sort
    has been removed completely. So we need to remove gp_enable_sort_distinct 
and
    other codes related to "noduplicate" sort.
---
 src/backend/commands/explain.c                         |  5 +----
 src/backend/executor/nodeSort.c                        |  5 -----
 src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp | 11 +++++------
 src/backend/nodes/copyfuncs.c                          |  3 ---
 src/backend/nodes/outfuncs.c                           |  2 --
 src/backend/nodes/readfuncs.c                          |  3 ---
 src/backend/optimizer/path/allpaths.c                  |  1 -
 src/backend/optimizer/plan/createplan.c                | 10 ----------
 src/backend/utils/misc/guc_gp.c                        | 10 ----------
 src/backend/utils/misc/postgresql.conf.sample          |  1 -
 src/include/cdb/cdbvars.h                              | 10 ----------
 src/include/nodes/execnodes.h                          |  2 --
 src/include/nodes/plannodes.h                          |  2 --
 src/include/utils/unsync_guc_name.h                    |  1 -
 src/test/regress/expected/join_gp.out                  |  2 +-
 src/test/regress/expected/subselect_gp.out             | 11 +++++------
 src/test/regress/expected/tpch500GB.out                |  2 +-
 17 files changed, 13 insertions(+), 68 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index e0f20f94d4..0fe1478635 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3012,10 +3012,7 @@ show_sort_keys(SortState *sortstate, List *ancestors, 
ExplainState *es)
        Sort       *plan = (Sort *) sortstate->ss.ps.plan;
        const char *SortKeystr;
 
-       if (sortstate->noduplicates)
-               SortKeystr = "Sort Key (Distinct)";
-       else
-               SortKeystr = "Sort Key";
+       SortKeystr = "Sort Key";
 
        show_sort_group_keys((PlanState *) sortstate, SortKeystr,
                                                 plan->numCols, 0, 
plan->sortColIdx,
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index b8c7c47fd1..dcb4ccf4e2 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -248,11 +248,6 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
         */
        ExecAssignExprContext(estate, &sortstate->ss.ps);
 
-       /* CDB */ /* evaluate a limit as part of the sort */
-       {
-               sortstate->noduplicates = node->noduplicates;
-       }
-
        /*
         * Miscellaneous initialization
         *
diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp 
b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
index 090e544a04..e8ea26de50 100644
--- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
+++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
@@ -3116,15 +3116,17 @@ CTranslatorDXLToPlStmt::TranslateDXLSort(
        const CDXLNode *sort_dxlnode, CDXLTranslateContext *output_context,
        CDXLTranslationContextArray *ctxt_translation_prev_siblings)
 {
+       // Ensure operator of sort_dxlnode exists and is EdxlopPhysicalSort
+       CDXLOperator *sort_dxlop = sort_dxlnode->GetOperator();
+       GPOS_ASSERT(nullptr != sort_dxlop);
+       GPOS_ASSERT(EdxlopPhysicalSort == sort_dxlop->GetDXLOperator());
+
        // create sort plan node
        Sort *sort = MakeNode(Sort);
 
        Plan *plan = &(sort->plan);
        plan->plan_node_id = m_dxl_to_plstmt_context->GetNextPlanId();
 
-       CDXLPhysicalSort *sort_dxlop =
-               CDXLPhysicalSort::Cast(sort_dxlnode->GetOperator());
-
        // translate operator costs
        TranslatePlanCosts(sort_dxlnode, plan);
 
@@ -3151,9 +3153,6 @@ CTranslatorDXLToPlStmt::TranslateDXLSort(
 
        plan->lefttree = child_plan;
 
-       // set sorting info
-       sort->noduplicates = sort_dxlop->FDiscardDuplicates();
-
        // translate sorting columns
 
        const CDXLNode *sort_col_list_dxl =
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 6a45828c01..50b08fb038 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -1298,9 +1298,6 @@ _copySort(const Sort *from)
         */
        CopySortFields(from, newnode);
 
-    /* CDB */
-       COPY_SCALAR_FIELD(noduplicates);
-
        return newnode;
 }
 
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 85fe1a621e..7291841a1b 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1013,8 +1013,6 @@ _outSortInfo(StringInfo str, const Sort *node)
        WRITE_OID_ARRAY(sortOperators, node->numCols);
        WRITE_OID_ARRAY(collations, node->numCols);
        WRITE_BOOL_ARRAY(nullsFirst, node->numCols);
-       /* CDB */
-    WRITE_BOOL_FIELD(noduplicates);
 }
 
 static void
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 4783e74cba..2bfdae1898 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -2388,9 +2388,6 @@ ReadSort(Sort *local_node)
        READ_TEMP_LOCALS();
 
        ReadCommonSort(local_node);
-
-       /* CDB */
-       READ_BOOL_FIELD(noduplicates);
 }
 
 /*
diff --git a/src/backend/optimizer/path/allpaths.c 
b/src/backend/optimizer/path/allpaths.c
index 19231adde3..10677763b2 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -66,7 +66,6 @@
 
 // TODO: these planner gucs need to be refactored into PlannerConfig.
 bool           gp_enable_sort_limit = false;
-bool           gp_enable_sort_distinct = false;
 
 /* results of subquery_is_pushdown_safe */
 typedef struct pushdown_safety_info
diff --git a/src/backend/optimizer/plan/createplan.c 
b/src/backend/optimizer/plan/createplan.c
index 89bd855ed6..9df1f326d0 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -7296,8 +7296,6 @@ make_sort(Plan *lefttree, int numCols,
 
        Assert(sortColIdx[0] != 0);
 
-       node->noduplicates = false; /* CDB */
-
        return node;
 }
 
@@ -8057,14 +8055,6 @@ make_unique_from_sortclauses(Plan *lefttree, List 
*distinctList)
        node->uniqOperators = uniqOperators;
        node->uniqCollations = uniqCollations;
 
-       /* CDB */       /* pass DISTINCT to sort */
-       if (IsA(lefttree, Sort) && gp_enable_sort_distinct)
-       {
-               Sort       *pSort = (Sort *) lefttree;
-
-               pSort->noduplicates = true;
-       }
-
        return node;
 }
 
diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c
index 0f7426c6ff..dd177a768a 100644
--- a/src/backend/utils/misc/guc_gp.c
+++ b/src/backend/utils/misc/guc_gp.c
@@ -800,16 +800,6 @@ struct config_bool ConfigureNamesBool_gp[] =
                NULL, NULL, NULL
        },
 
-       {
-               {"gp_enable_sort_distinct", PGC_USERSET, QUERY_TUNING_METHOD,
-                       gettext_noop("Enable duplicate removal to be performed 
while sorting."),
-                       gettext_noop("Reduces data handling when plan calls for 
removing duplicates from sorted rows.")
-               },
-               &gp_enable_sort_distinct,
-               true,
-               NULL, NULL, NULL
-       },
-
        {
                {"gp_enable_motion_deadlock_sanity", PGC_USERSET, 
DEVELOPER_OPTIONS,
                        gettext_noop("Enable verbose check at planning time."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample 
b/src/backend/utils/misc/postgresql.conf.sample
index 0908c41223..31ac67628a 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -396,7 +396,6 @@ max_prepared_transactions = 250             # can be 0 or 
more
 #gp_selectivity_damping_for_joins = off
 
 #gp_enable_sort_limit = on
-#gp_enable_sort_distinct = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h
index 1d9c72db61..0485d2bf84 100644
--- a/src/include/cdb/cdbvars.h
+++ b/src/include/cdb/cdbvars.h
@@ -648,16 +648,6 @@ extern int explain_memory_verbosity;
  */
 extern bool gp_enable_sort_limit;
 
-/* May Cloudberry discard duplicate rows in sort if it is is wrapped by a
- * DISTINCT clause (unique aggregation operator)?
- *
- * The code does not currently use planner estimates for this.  If enabled,
- * the tactic is used whenever possible.
- *
- * GPDB_12_MERGE_FIXME: Resurrect this
- */
-extern bool gp_enable_sort_distinct;
-
 extern bool trace_sort;
 
 /**
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 44b54dc68f..a160893618 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2562,8 +2562,6 @@ typedef struct SortState
        bool            am_worker;              /* are we a worker? */
        SharedSortInfo *shared_info;    /* one entry per worker */
 
-       bool            noduplicates;   /* true if discard duplicate rows */
-
        bool            delayEagerFree;         /* is it safe to free memory 
used by this node,
                                                                         * when 
this node has outputted its last row? */
        TuplesortInstrumentation sortstats; /* holds stats, if the Sort is 
eagerly free'd */
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 773b5c27c6..45eaaa83ae 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1244,8 +1244,6 @@ typedef struct Sort
        Oid                *sortOperators;      /* OIDs of operators to sort 
them by */
        Oid                *collations;         /* OIDs of collations */
        bool       *nullsFirst;         /* NULLS FIRST/LAST directions */
-    /* CDB */
-       bool            noduplicates;   /* TRUE if sort should discard 
duplicates */
 } Sort;
 
 /* ----------------
diff --git a/src/include/utils/unsync_guc_name.h 
b/src/include/utils/unsync_guc_name.h
index 43e2bbee69..382a46b6f8 100644
--- a/src/include/utils/unsync_guc_name.h
+++ b/src/include/utils/unsync_guc_name.h
@@ -200,7 +200,6 @@
                "gp_enable_refresh_fast_path",
                "gp_enable_relsize_collection",
                "gp_enable_slow_writer_testmode",
-               "gp_enable_sort_distinct",
                "gp_enable_sort_limit",
                "gp_encoding_check_locale_compatibility",
                "gp_external_enable_exec",
diff --git a/src/test/regress/expected/join_gp.out 
b/src/test/regress/expected/join_gp.out
index 6722f5a824..3f2c8cf077 100644
--- a/src/test/regress/expected/join_gp.out
+++ b/src/test/regress/expected/join_gp.out
@@ -1519,7 +1519,7 @@ select * from foo where exists (select 1 from bar where 
foo.a = bar.b);
          ->  Unique
                Group Key: (RowIdExpr)
                ->  Sort
-                     Sort Key (Distinct): (RowIdExpr)
+                     Sort Key: (RowIdExpr)
                      ->  Redistribute Motion 3:3  (slice2; segments: 3)
                            Hash Key: (RowIdExpr)
                            ->  Hash Join
diff --git a/src/test/regress/expected/subselect_gp.out 
b/src/test/regress/expected/subselect_gp.out
index cd3c95daa7..c447bcc555 100644
--- a/src/test/regress/expected/subselect_gp.out
+++ b/src/test/regress/expected/subselect_gp.out
@@ -1916,8 +1916,7 @@ select * from dedup_reptab r where r.a in (select t.a/10 
from dedup_tab t);
                Locus: Hashed
                Group Key: (RowIdExpr)
                ->  Sort
-                     Locus: Hashed
-                     Sort Key (Distinct): (RowIdExpr)
+                     Sort Key: (RowIdExpr)
                      ->  Redistribute Motion 3:3  (slice2; segments: 3)
                            Locus: Hashed
                            Hash Key: (RowIdExpr)
@@ -1975,7 +1974,7 @@ select * from dedup_srf() r(a) where r.a in (select 
t.a/10 from dedup_tab t);
          ->  Unique
                Group Key: (RowIdExpr)
                ->  Sort
-                     Sort Key (Distinct): (RowIdExpr)
+                     Sort Key: (RowIdExpr)
                      ->  Redistribute Motion 3:3  (slice2; segments: 3)
                            Hash Key: (RowIdExpr)
                            ->  Hash Join
@@ -2004,7 +2003,7 @@ select * from dedup_srf_stable() r(a) where r.a in 
(select t.a/10 from dedup_tab
          ->  Unique
                Group Key: (RowIdExpr)
                ->  Sort
-                     Sort Key (Distinct): (RowIdExpr)
+                     Sort Key: (RowIdExpr)
                      ->  Redistribute Motion 3:3  (slice2; segments: 3)
                            Hash Key: (RowIdExpr)
                            ->  Hash Join
@@ -2033,7 +2032,7 @@ select * from dedup_srf_volatile() r(a) where r.a in 
(select t.a/10 from dedup_t
          ->  Unique
                Group Key: (RowIdExpr)
                ->  Sort
-                     Sort Key (Distinct): (RowIdExpr)
+                     Sort Key: (RowIdExpr)
                      ->  Redistribute Motion 3:3  (slice2; segments: 3)
                            Hash Key: (RowIdExpr)
                            ->  Hash Join
@@ -2113,7 +2112,7 @@ select * from dedup_func_volatile() r(a) where r.a in 
(select t.a/10 from dedup_
          ->  Unique
                Group Key: (RowIdExpr)
                ->  Sort
-                     Sort Key (Distinct): (RowIdExpr)
+                     Sort Key: (RowIdExpr)
                      ->  Redistribute Motion 3:3  (slice2; segments: 3)
                            Hash Key: (RowIdExpr)
                            ->  Hash Join
diff --git a/src/test/regress/expected/tpch500GB.out 
b/src/test/regress/expected/tpch500GB.out
index 7a36575cfb..49e9040904 100755
--- a/src/test/regress/expected/tpch500GB.out
+++ b/src/test/regress/expected/tpch500GB.out
@@ -2005,7 +2005,7 @@ order by
                                  ->  Unique  (cost=78801360.21..78983928.42 
rows=18256821 width=22)
                                        Group By: orders.ctid
                                        ->  Sort  
(cost=78801360.21..78892644.32 rows=18256821 width=22)
-                                             Sort Key (Distinct): orders.ctid
+                                             Sort Key: orders.ctid
                                              ->  Hash Join  
(cost=14835362.82..71460313.52 rows=18256821 width=22)
                                                    Hash Cond: 
lineitem.l_orderkey = orders.o_orderkey
                                                    ->  Seq Scan on lineitem  
(cost=0.00..51767512.60 rows=489638315 width=8)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org
For additional commands, e-mail: commits-h...@cloudberry.apache.org

Reply via email to