We originally did this in 91e9e89dc, but a memory leak was discovered
as I neglected to pfree the datum which is freshly allocated in
tuplesort_getdatum. Because that was discovered late in the PG15
cycle, we opted to just disable the datum sort optimisation for byref
types in 3a5817695.

As was mentioned in [1], it looks like we could really use a version
of tuplesort_getdatum which does not palloc a new Datum.  nodeSort.c,
when calling tuplesort_gettupleslot passes copy==false, so it would
make sense if the datum sort variation didn't do any copying either.

In the attached patch, I've added a function named
tuplesort_getdatum_nocopy() which is the same as tuplesort_getdatum()
only without the datumCopy(). I opted for the new function rather than
a new parameter in the existing function just to reduce branching and
additional needless overhead.

I also looked at the tuplesort_getdatum() call inside
process_ordered_aggregate_single() and made a few changes there so we
don't needlessly perform a datumCopy() when we skip a Datum due to
finding it the same as the previous Datum in a DISTINCT aggregate
situation.

I was also looking at mode_final(). Perhaps that could do with the
same treatment, I just didn't touch it in the attached patch.

A quick performance test with:

create table t1 (a varchar(32) not null, b varchar(32) not null);
insert into t1 select md5((x%10)::text),md5((x%10)::text) from
generate_Series(1,1000000)x;
vacuum freeze t1;
create index on t1(a);

Yields a small speedup for the DISTINCT aggregate case.

work_mem = 256MB
query = select max(distinct a), max(distinct b) from t1;

Master:
latency average = 313.197 ms

Patched:
latency average = 304.335 ms (about 3% faster)

The Datum sort in nodeSort.c is more impressive.

query = select b from t1 order by b offset 1000000;

Master:
latency average = 344.763 ms

Patched:
latency average = 268.374 ms (about 28% faster)

I'll add this to the November CF

David

[1] 
https://www.postgresql.org/message-id/CAApHDvqS6wC5U==k9Hd26E4EQXH3QR67-T4=q1rq36ngvjf...@mail.gmail.com
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index fe74e49814..b5f63b5a2b 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -878,8 +878,8 @@ process_ordered_aggregate_single(AggState *aggstate,
         * pfree them when they are no longer needed.
         */
 
-       while (tuplesort_getdatum(pertrans->sortstates[aggstate->current_set],
-                                                         true, newVal, isNull, 
&newAbbrevVal))
+       while 
(tuplesort_getdatum_nocopy(pertrans->sortstates[aggstate->current_set],
+                                                                        true, 
newVal, isNull, &newAbbrevVal))
        {
                /*
                 * Clear and select the working context for evaluation of the 
equality
@@ -900,24 +900,33 @@ process_ordered_aggregate_single(AggState *aggstate,
                                                                                
         pertrans->aggCollation,
                                                                                
         oldVal, *newVal)))))
                {
-                       /* equal to prior, so forget this one */
-                       if (!pertrans->inputtypeByVal && !*isNull)
-                               pfree(DatumGetPointer(*newVal));
+                       MemoryContextSwitchTo(oldContext);
+                       continue;
                }
                else
                {
                        advance_transition_function(aggstate, pertrans, 
pergroupstate);
-                       /* forget the old value, if any */
-                       if (!oldIsNull && !pertrans->inputtypeByVal)
-                               pfree(DatumGetPointer(oldVal));
-                       /* and remember the new one for subsequent equality 
checks */
-                       oldVal = *newVal;
+
+                       MemoryContextSwitchTo(oldContext);
+
+                       /*
+                        * Forget the old value, if any and remember the new 
one for
+                        * subsequent equality checks.
+                        */
+                       if (!pertrans->inputtypeByVal)
+                       {
+                               if (!oldIsNull)
+                                       pfree(DatumGetPointer(oldVal));
+                               if (!*isNull)
+                                       oldVal = datumCopy(*newVal, 
pertrans->inputtypeByVal,
+                                                                          
pertrans->inputtypeLen);
+                       }
+                       else
+                               oldVal = *newVal;
                        oldAbbrevVal = newAbbrevVal;
                        oldIsNull = *isNull;
                        haveOldVal = true;
                }
-
-               MemoryContextSwitchTo(oldContext);
        }
 
        if (!oldIsNull && !pertrans->inputtypeByVal)
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index 37ad35704b..f418be30a1 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -197,8 +197,8 @@ ExecSort(PlanState *pstate)
        if (node->datumSort)
        {
                ExecClearTuple(slot);
-               if (tuplesort_getdatum(tuplesortstate, 
ScanDirectionIsForward(dir),
-                                                          
&(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
+               if (tuplesort_getdatum_nocopy(tuplesortstate, 
ScanDirectionIsForward(dir),
+                                                                         
&(slot->tts_values[0]), &(slot->tts_isnull[0]), NULL))
                        ExecStoreVirtualTuple(slot);
        }
        else
@@ -278,10 +278,10 @@ ExecInitSort(Sort *node, EState *estate, int eflags)
        outerTupDesc = ExecGetResultType(outerPlanState(sortstate));
 
        /*
-        * We perform a Datum sort when we're sorting just a single byval 
column,
+        * We perform a Datum sort when we're sorting just a single column,
         * otherwise we perform a tuple sort.
         */
-       if (outerTupDesc->natts == 1 && TupleDescAttr(outerTupDesc, 
0)->attbyval)
+       if (outerTupDesc->natts == 1)
                sortstate->datumSort = true;
        else
                sortstate->datumSort = false;
diff --git a/src/backend/utils/sort/tuplesortvariants.c 
b/src/backend/utils/sort/tuplesortvariants.c
index afa5bdbf04..a9d51234e6 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -886,6 +886,58 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward,
        return true;
 }
 
+/*
+ * Fetch the next Datum in either forward or back direction.
+ * Returns false if no more datums.
+ *
+ * If the Datum is pass-by-ref type, the returned value is a pointer directly
+ * into the tuplesort's stup.tuple.  This is only safe for callers that no
+ * longer require a reference to the datum beyond any subsequent manipulation
+ * of the tuplesort's state.
+ *
+ * Caller may optionally be passed back abbreviated value (on true return
+ * value) when abbreviation was used, which can be used to cheaply avoid
+ * equality checks that might otherwise be required.  Caller can safely make a
+ * determination of "non-equal tuple" based on simple binary inequality.  A
+ * NULL value will have a zeroed abbreviated value representation, which caller
+ * may rely on in abbreviated inequality check.
+ */
+bool
+tuplesort_getdatum_nocopy(Tuplesortstate *state, bool forward,
+                                                 Datum *val, bool *isNull, 
Datum *abbrev)
+{
+       TuplesortPublic *base = TuplesortstateGetPublic(state);
+       MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
+       SortTuple       stup;
+
+       if (!tuplesort_gettuple_common(state, forward, &stup))
+       {
+               MemoryContextSwitchTo(oldcontext);
+               return false;
+       }
+
+       /* Ensure we copy into caller's memory context */
+       MemoryContextSwitchTo(oldcontext);
+
+       /* Record abbreviated key for caller */
+       if (base->sortKeys->abbrev_converter && abbrev)
+               *abbrev = stup.datum1;
+
+       if (stup.isnull1 || !base->tuples)
+       {
+               *val = stup.datum1;
+               *isNull = stup.isnull1;
+       }
+       else
+       {
+               /* use stup.tuple because stup.datum1 may be an abbreviation */
+               *val = PointerGetDatum(stup.tuple);
+               *isNull = false;
+       }
+
+       return true;
+}
+
 
 /*
  * Routines specialized for HeapTuple (actually MinimalTuple) case
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 4441274990..fc8fc780eb 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -441,6 +441,7 @@ extern HeapTuple tuplesort_getheaptuple(Tuplesortstate 
*state, bool forward);
 extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
 extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
                                                           Datum *val, bool 
*isNull, Datum *abbrev);
-
+extern bool tuplesort_getdatum_nocopy(Tuplesortstate *state, bool forward,
+                                                                         Datum 
*val, bool *isNull, Datum *abbrev);
 
 #endif                                                 /* TUPLESORT_H */

Reply via email to