On Thu, 29 Sept 2022 at 12:07, Peter Geoghegan <p...@bowt.ie> wrote:
> Also potentially relevant: the 2017 commit fa117ee4 anticipated adding
> a "copy" argument to tuplesort_getdatum() (the same commit added such
> a "copy" argument to tuplesort_gettupleslot()). I see that that still
> hasn't happened to tuplesort_getdatum() all these years later. Might
> be a good idea to do it in the next year or two, though.
>
> If David is interested in pursuing this now then I certainly won't object.

Just while this is fresh in my head, I wrote some code to make this
happen.  My preference would be not to add the "copy" param to the
existing function and instead just add a new function to prevent
additional branching.

The attached puts back the datum sort in nodeSort.c for byref types
and adjusts process_ordered_aggregate_single() to make use of this
function.

I did a quick benchmark to see if this help DISTINCT aggregate any:

create table t1 (a varchar(32) not null, b varchar(32) not null);
insert into t1 select md5((x%10)::text),md5((x%10)::text) from
generate_Series(1,1000000)x;
vacuum freeze t1;
create index on t1(a);

With a work_mem of 256MBs I get:

query = select max(distinct a), max(distinct b) from t1;

Master:
latency average = 313.197 ms

Patched:
latency average = 304.335 ms

So not a very impressive speedup there (about 3%)

Some excerpts from perf top show:

Master:
   1.40%  postgres          [.] palloc
   1.13%  postgres          [.] tuplesort_getdatum
   0.77%  postgres          [.] datumCopy

Patched:
   0.91%  postgres          [.] tuplesort_getdatum_nocopy
   0.65%  postgres          [.] palloc

I stared for a while at the mode_final() function and thought maybe we
could use the nocopy variant there. I just didn't quite pluck up the
motivation to write any code to see if it could be made faster.

David
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index fe74e49814..b5f63b5a2b 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -878,8 +878,8 @@ process_ordered_aggregate_single(AggState *aggstate,
         * pfree them when they are no longer needed.
         */
 
-       while (tuplesort_getdatum(pertrans->sortstates[aggstate->current_set],
-                                                         true, newVal, isNull, 
&newAbbrevVal))
+       while 
(tuplesort_getdatum_nocopy(pertrans->sortstates[aggstate->current_set],
+                                                                        true, 
newVal, isNull, &newAbbrevVal))
        {
                /*
                 * Clear and select the working context for evaluation of the 
equality
@@ -900,24 +900,33 @@ process_ordered_aggregate_single(AggState *aggstate,
                                                                                
         pertrans->aggCollation,
                                                                                
         oldVal, *newVal)))))
                {
-                       /* equal to prior, so forget this one */
-                       if (!pertrans->inputtypeByVal && !*isNull)
-                               pfree(DatumGetPointer(*newVal));
+                       MemoryContextSwitchTo(oldContext);
+                       continue;
                }
                else
                {
                        advance_transition_function(aggstate, pertrans, 
pergroupstate);
-                       /* forget the old value, if any */
-                       if (!oldIsNull && !pertrans->inputtypeByVal)
-                               pfree(DatumGetPointer(oldVal));
-                       /* and remember the new one for subsequent equality 
checks */
-                       oldVal = *newVal;
+
+                       MemoryContextSwitchTo(oldContext);
+
+                       /*
+                        * Forget the old value, if any and remember the new 
one for
+                        * subsequent equality checks.
+                        */
+                       if (!pertrans->inputtypeByVal)
+                       {
+                               if (!oldIsNull)
+                                       pfree(DatumGetPointer(oldVal));
+                               if (!*isNull)
+                                       oldVal = datumCopy(*newVal, 
pertrans->inputtypeByVal,
+                                                                          
pertrans->inputtypeLen);
+                       }
+                       else
+                               oldVal = *newVal;
                        oldAbbrevVal = newAbbrevVal;
                        oldIsNull = *isNull;
                        haveOldVal = true;
                }
-
-               MemoryContextSwitchTo(oldContext);
        }
 
        if (!oldIsNull && !pertrans->inputtypeByVal)
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index 37ad35704b..f418be30a1 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -197,8 +197,8 @@ ExecSort(PlanState *pstate)
        if (node->datumSort)
        {
                ExecClearTuple(slot);
-               if (tuplesort_getdatum(tuplesortstate, 
ScanDirectionIsForward(dir),
-                                                          
&(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
+               if (tuplesort_getdatum_nocopy(tuplesortstate, 
ScanDirectionIsForward(dir),
+                                                                         
&(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
                        ExecStoreVirtualTuple(slot);
        }
        else
@@ -278,10 +278,10 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
        outerTupDesc = ExecGetResultType(outerPlanState(sortstate));
 
        /*
-        * We perform a Datum sort when we're sorting just a single byval 
column,
+        * We perform a Datum sort when we're sorting just a single column,
         * otherwise we perform a tuple sort.
         */
-       if (outerTupDesc->natts == 1 && TupleDescAttr(outerTupDesc, 
0)->attbyval)
+       if (outerTupDesc->natts == 1)
                sortstate->datumSort = true;
        else
                sortstate->datumSort = false;
diff --git a/src/backend/utils/sort/tuplesortvariants.c 
b/src/backend/utils/sort/tuplesortvariants.c
index afa5bdbf04..a9d51234e6 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -886,6 +886,58 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward,
        return true;
 }
 
+/*
+ * Fetch the next Datum in either forward or back direction.
+ * Returns false if no more datums.
+ *
+ * If the Datum is pass-by-ref type, the returned value is a pointer directly
+ * into the tuplesort's stup.tuple.  This is only safe for callers that no
+ * longer require a reference to the datum beyond any subsequent manipulation
+ * of the tuplesort's state.
+ *
+ * Caller may optionally be passed back abbreviated value (on true return
+ * value) when abbreviation was used, which can be used to cheaply avoid
+ * equality checks that might otherwise be required.  Caller can safely make a
+ * determination of "non-equal tuple" based on simple binary inequality.  A
+ * NULL value will have a zeroed abbreviated value representation, which caller
+ * may rely on in abbreviated inequality check.
+ */
+bool
+tuplesort_getdatum_nocopy(Tuplesortstate *state, bool forward,
+                                                 Datum *val, bool *isNull, 
Datum *abbrev)
+{
+       TuplesortPublic *base = TuplesortstateGetPublic(state);
+       MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
+       SortTuple       stup;
+
+       if (!tuplesort_gettuple_common(state, forward, &stup))
+       {
+               MemoryContextSwitchTo(oldcontext);
+               return false;
+       }
+
+       /* Ensure we copy into caller's memory context */
+       MemoryContextSwitchTo(oldcontext);
+
+       /* Record abbreviated key for caller */
+       if (base->sortKeys->abbrev_converter && abbrev)
+               *abbrev = stup.datum1;
+
+       if (stup.isnull1 || !base->tuples)
+       {
+               *val = stup.datum1;
+               *isNull = stup.isnull1;
+       }
+       else
+       {
+               /* use stup.tuple because stup.datum1 may be an abbreviation */
+               *val = PointerGetDatum(stup.tuple);
+               *isNull = false;
+       }
+
+       return true;
+}
+
 
 /*
  * Routines specialized for HeapTuple (actually MinimalTuple) case
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 4441274990..fc8fc780eb 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -441,6 +441,7 @@ extern HeapTuple tuplesort_getheaptuple(Tuplesortstate 
*state, bool forward);
 extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
 extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
                                                           Datum *val, bool 
*isNull, Datum *abbrev);
-
+extern bool tuplesort_getdatum_nocopy(Tuplesortstate *state, bool forward,
+                                                                         Datum 
*val, bool *isNull, Datum *abbrev);
 
 #endif                                                 /* TUPLESORT_H */

Reply via email to