From 6f306eb908dd76d240bd7ffe34eddfb90032ea8a Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Sun, 30 Nov 2025 17:11:00 +0800
Subject: [PATCH v1] Build the heap more efficient in tuplesort.c

Now we build the heap by using tuplesort_heap_insert(), which has
a sift-up every call. By using tuplesort_heap_insert_unordered()
and tuplesort_heap_build() we can build the heap more efficient.
(see binaryheap.c)
---
 src/backend/utils/sort/tuplesort.c | 139 +++++++++++++++--------------
 1 file changed, 74 insertions(+), 65 deletions(-)

diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 5d4411dc33f..79c3cbe5a2d 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -465,9 +465,11 @@ static void dumptuples(Tuplesortstate *state, bool alltuples);
 static void make_bounded_heap(Tuplesortstate *state);
 static void sort_bounded_heap(Tuplesortstate *state);
 static void tuplesort_sort_memtuples(Tuplesortstate *state);
-static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple);
+static void tuplesort_heap_build(Tuplesortstate *state);
+static void tuplesort_heap_insert_unordered(Tuplesortstate *state, SortTuple *tuple);
 static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple);
 static void tuplesort_heap_delete_top(Tuplesortstate *state);
+static void tuplesort_heap_sift_down(Tuplesortstate *state, int offset);
 static void reversedirection(Tuplesortstate *state);
 static unsigned int getlen(LogicalTape *tape, bool eofOK);
 static void markrunend(LogicalTape *tape);
@@ -2270,9 +2272,12 @@ beginmerge(Tuplesortstate *state)
 		if (mergereadnext(state, state->inputTapes[srcTapeIndex], &tup))
 		{
 			tup.srctape = srcTapeIndex;
-			tuplesort_heap_insert(state, &tup);
+			tuplesort_heap_insert_unordered(state, &tup);
+			CHECK_FOR_INTERRUPTS();
 		}
 	}
+
+	tuplesort_heap_build(state);
 }
 
 /*
@@ -2593,32 +2598,25 @@ make_bounded_heap(Tuplesortstate *state)
 	/* Reverse sort direction so largest entry will be at root */
 	reversedirection(state);
 
-	state->memtupcount = 0;		/* make the heap empty */
-	for (i = 0; i < tupcount; i++)
+	/* Build the initial heap from the first 'bound' tuples */
+	state->memtupcount = state->bound;
+	tuplesort_heap_build(state);
+
+	/* Now process the remaining tuples */
+	for (i = state->bound; i < tupcount; i++)
 	{
-		if (state->memtupcount < state->bound)
+		/*
+		 * The heap is full.  Replace the largest entry with the new
+		 * tuple, or just discard it, if it's larger than anything already
+		 * in the heap.
+		 */
+		if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0)
 		{
-			/* Insert next tuple into heap */
-			/* Must copy source tuple to avoid possible overwrite */
-			SortTuple	stup = state->memtuples[i];
-
-			tuplesort_heap_insert(state, &stup);
+			free_sort_tuple(state, &state->memtuples[i]);
+			CHECK_FOR_INTERRUPTS();
 		}
 		else
-		{
-			/*
-			 * The heap is full.  Replace the largest entry with the new
-			 * tuple, or just discard it, if it's larger than anything already
-			 * in the heap.
-			 */
-			if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0)
-			{
-				free_sort_tuple(state, &state->memtuples[i]);
-				CHECK_FOR_INTERRUPTS();
-			}
-			else
-				tuplesort_heap_replace_top(state, &state->memtuples[i]);
-		}
+			tuplesort_heap_replace_top(state, &state->memtuples[i]);
 	}
 
 	Assert(state->memtupcount == state->bound);
@@ -2721,45 +2719,37 @@ tuplesort_sort_memtuples(Tuplesortstate *state)
 }
 
 /*
- * Insert a new tuple into an empty or existing heap, maintaining the
- * heap invariant.  Caller is responsible for ensuring there's room.
- *
- * Note: For some callers, tuple points to a memtuples[] entry above the
- * end of the heap.  This is safe as long as it's not immediately adjacent
- * to the end of the heap (ie, in the [memtupcount] array entry) --- if it
- * is, it might get overwritten before being moved into the heap!
+ * Assembles a valid heap from the tuples added by
+ * tuplesort_heap_insert_unordered().
  */
 static void
-tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple)
+tuplesort_heap_build(Tuplesortstate *state)
 {
-	SortTuple  *memtuples;
-	int			j;
-
-	memtuples = state->memtuples;
-	Assert(state->memtupcount < state->memtupsize);
+	int			i;
 
-	CHECK_FOR_INTERRUPTS();
+	if (state->memtupcount < 2)
+		return;
 
-	/*
-	 * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is
-	 * using 1-based array indexes, not 0-based.
-	 */
-	j = state->memtupcount++;
-	while (j > 0)
-	{
-		int			i = (j - 1) >> 1;
+	for (i = (state->memtupcount - 2) / 2; i >= 0; i--)
+		tuplesort_heap_sift_down(state, i);
+}
 
-		if (COMPARETUP(state, tuple, &memtuples[i]) >= 0)
-			break;
-		memtuples[j] = memtuples[i];
-		j = i;
-	}
-	memtuples[j] = *tuple;
+/*
+ * Insert a new tuple into the end of the heap, without maintaining the
+ * heap invariant.  Caller is responsible for ensuring there's room.
+ *
+ * To obtain a valid heap, one must call tuplesort_heap_build() afterwards.
+ */
+static void
+tuplesort_heap_insert_unordered(Tuplesortstate *state, SortTuple *tuple)
+{
+	Assert(state->memtupcount < state->memtupsize);
+	state->memtuples[state->memtupcount++] = *tuple;
 }
 
 /*
  * Remove the tuple at state->memtuples[0] from the heap.  Decrement
- * memtupcount, and sift up to maintain the heap invariant.
+ * memtupcount, and sift down to maintain the heap invariant.
  *
  * The caller has already free'd the tuple the top node points to,
  * if necessary.
@@ -2774,28 +2764,46 @@ tuplesort_heap_delete_top(Tuplesortstate *state)
 		return;
 
 	/*
-	 * Remove the last tuple in the heap, and re-insert it, by replacing the
-	 * current top node with it.
+	 * Placing the last tuple in the top, and sift the new top
+	 * down to its correct position.
 	 */
-	tuple = &memtuples[state->memtupcount];
-	tuplesort_heap_replace_top(state, tuple);
+	memtuples[0] = memtuples[state->memtupcount];
+
+	if (state->memtupcount > 1)
+		tuplesort_heap_sift_down(state, 0);
 }
 
 /*
- * Replace the tuple at state->memtuples[0] with a new tuple.  Sift up to
+ * Replace the tuple at state->memtuples[0] with a new tuple.  Sift down to
  * maintain the heap invariant.
  *
- * This corresponds to Knuth's "sift-up" algorithm (Algorithm 5.2.3H,
+ * This corresponds to Knuth's "sift-down" algorithm (Algorithm 5.2.3H,
  * Heapsort, steps H3-H8).
  */
 static void
 tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple)
 {
-	SortTuple  *memtuples = state->memtuples;
+	Assert(state->memtupcount >= 1);
+
+	state->memtuples[0] = *tuple;
+
+	if (state->memtupcount > 1)
+		tuplesort_heap_sift_down(state, 0);
+}
+
+/*
+ * Sift state->memtuples[offset] down from its current position to
+ * maintain the heap invariant.
+ */
+static void
+tuplesort_heap_sift_down(Tuplesortstate *state, int offset)
+{
+	SortTuple   *memtuples = state->memtuples;
+	SortTuple    tuple;
 	unsigned int i,
-				n;
+				 n;
 
-	Assert(state->memtupcount >= 1);
+	Assert(offset >= 0 && offset < state->memtupcount);
 
 	CHECK_FOR_INTERRUPTS();
 
@@ -2804,8 +2812,9 @@ tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple)
 	 * This prevents overflow in the "2 * i + 1" calculation, since at the top
 	 * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2.
 	 */
+	tuple = memtuples[offset];
 	n = state->memtupcount;
-	i = 0;						/* i is where the "hole" is */
+	i = offset;					/* i is where the "hole" is */
 	for (;;)
 	{
 		unsigned int j = 2 * i + 1;
@@ -2815,12 +2824,12 @@ tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple)
 		if (j + 1 < n &&
 			COMPARETUP(state, &memtuples[j], &memtuples[j + 1]) > 0)
 			j++;
-		if (COMPARETUP(state, tuple, &memtuples[j]) <= 0)
+		if (COMPARETUP(state, &tuple, &memtuples[j]) <= 0)
 			break;
 		memtuples[i] = memtuples[j];
 		i = j;
 	}
-	memtuples[i] = *tuple;
+	memtuples[i] = tuple;
 }
 
 /*
-- 
2.52.0

