From 2981406dbf5bb738273b3ff4c26854ce7d715dd1 Mon Sep 17 00:00:00 2001
From: ChangAo Chen <cca5507@qq.com>
Date: Wed, 3 Dec 2025 18:58:28 +0800
Subject: [PATCH v1] Support loser tree for k-way merge.

The loser tree usually has fewer comparisons than the heap during
k-way merge.
---
 src/backend/utils/misc/guc_parameters.dat |   7 +
 src/backend/utils/sort/tuplesort.c        | 160 +++++++++++++++++++++-
 src/include/utils/guc.h                   |   1 +
 3 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 3b9d8349078..123ece8ea75 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -882,6 +882,13 @@
   boot_val => 'true',
 },
 
+{ name => 'enable_loser_tree', type => 'bool', context => 'PGC_USERSET', group => 'QUERY_TUNING_METHOD',
+  short_desc => 'Enables loser tree for k-way merge.',
+  flags => 'GUC_NOT_IN_SAMPLE',
+  variable => 'enable_loser_tree',
+  boot_val => 'false',
+},
+
 { name => 'enable_material', type => 'bool', context => 'PGC_USERSET', group => 'QUERY_TUNING_METHOD',
   short_desc => 'Enables the planner\'s use of materialization.',
   flags => 'GUC_EXPLAIN',
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 5d4411dc33f..381f5909094 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -122,6 +122,9 @@
 
 /* GUC variables */
 bool		trace_sort = false;
+bool        enable_loser_tree = false;
+
+#define LOSER_TREE_EOF -1
 
 #ifdef DEBUG_BOUNDED_SORT
 bool		optimize_bounded_sort = true;
@@ -219,6 +222,9 @@ struct Tuplesortstate
 	int			memtupsize;		/* allocated length of memtuples array */
 	bool		growmemtuples;	/* memtuples' growth still underway? */
 
+	bool        useLoserTree;   /* use loser tree for k-way merge if true */
+	int        *losers;         /* array of losers, losers[0] is the winner */
+
 	/*
 	 * Memory for tuples is sometimes allocated using a simple slab allocator,
 	 * rather than with palloc().  Currently, we switch to slab allocation
@@ -468,6 +474,8 @@ static void tuplesort_sort_memtuples(Tuplesortstate *state);
 static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple);
 static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple);
 static void tuplesort_heap_delete_top(Tuplesortstate *state);
+static void tuplesort_loser_tree_build(Tuplesortstate *state);
+static void tuplesort_loser_tree_adjust(Tuplesortstate *state, int start);
 static void reversedirection(Tuplesortstate *state);
 static unsigned int getlen(LogicalTape *tape, bool eofOK);
 static void markrunend(LogicalTape *tape);
@@ -681,6 +689,8 @@ tuplesort_begin_common(int workMem, SortCoordinate coordinate, int sortopt)
 	state->base.sortopt = sortopt;
 	state->base.tuples = true;
 	state->abbrevNext = 10;
+	state->useLoserTree = enable_loser_tree;
+	state->losers = NULL;
 
 	/*
 	 * workMem is forced to be at least 64KB, the current minimum valid value
@@ -1647,6 +1657,37 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
 				state->lastReturnedTuple = NULL;
 			}
 
+			if (state->useLoserTree && state->memtupcount > 0)
+			{
+				int          i = state->losers[0];
+				int          start = i + state->memtupcount;
+				int			 srcTapeIndex = state->memtuples[i].srctape;
+				LogicalTape *srcTape = state->inputTapes[srcTapeIndex];
+				SortTuple	 newtup;
+
+				*stup = state->memtuples[i];
+
+				state->lastReturnedTuple = stup->tuple;
+
+				if (mergereadnext(state, srcTape, &newtup))
+				{
+					newtup.srctape = srcTapeIndex;
+					state->memtuples[i] = newtup;
+				}
+				else
+				{
+					state->losers[start] = LOSER_TREE_EOF;
+					state->nInputRuns--;
+					LogicalTapeClose(srcTape);
+				}
+				tuplesort_loser_tree_adjust(state, start);
+
+				if (state->losers[0] == LOSER_TREE_EOF)
+					state->memtupcount = 0;
+
+				return true;
+			}
+
 			/*
 			 * This code should match the inner loop of mergeonerun().
 			 */
@@ -2206,6 +2247,41 @@ mergeonerun(Tuplesortstate *state)
 
 	Assert(state->slabAllocatorUsed);
 
+	if (state->useLoserTree && state->memtupcount > 0)
+	{
+		while (state->losers[0] != LOSER_TREE_EOF)
+		{
+			int         i = state->losers[0];
+			int         start = i + state->memtupcount;
+			SortTuple	stup;
+
+			/* write the tuple to destTape */
+			srcTapeIndex = state->memtuples[i].srctape;
+			srcTape = state->inputTapes[srcTapeIndex];
+			WRITETUP(state, state->destTape, &state->memtuples[i]);
+
+			/* recycle the slot of the tuple we just wrote out, for the next read */
+			if (state->memtuples[i].tuple)
+				RELEASE_SLAB_SLOT(state, state->memtuples[i].tuple);
+
+			if (mergereadnext(state, srcTape, &stup))
+			{
+				stup.srctape = srcTapeIndex;
+				state->memtuples[i] = stup;
+			}
+			else
+			{
+				state->losers[start] = LOSER_TREE_EOF;
+				state->nInputRuns--;
+			}
+
+			tuplesort_loser_tree_adjust(state, start);
+		}
+		state->memtupcount = 0;
+		markrunend(state->destTape);
+		return;
+	}
+
 	/*
 	 * Execute merge by repeatedly extracting lowest tuple in heap, writing it
 	 * out, and replacing it with next tuple from same tape (if there is
@@ -2270,9 +2346,16 @@ beginmerge(Tuplesortstate *state)
 		if (mergereadnext(state, state->inputTapes[srcTapeIndex], &tup))
 		{
 			tup.srctape = srcTapeIndex;
-			tuplesort_heap_insert(state, &tup);
+
+			if (state->useLoserTree)
+				state->memtuples[state->memtupcount++] = tup;
+			else
+				tuplesort_heap_insert(state, &tup);
 		}
 	}
+
+	if (state->useLoserTree && state->memtupcount > 0)
+		tuplesort_loser_tree_build(state);
 }
 
 /*
@@ -2823,6 +2906,81 @@ tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple)
 	memtuples[i] = *tuple;
 }
 
+static void
+tuplesort_loser_tree_build(Tuplesortstate *state)
+{
+	int k = state->memtupcount; /* k-way */
+	int winner[MAXORDER * 2];
+	int i;
+
+	Assert(k > 0 && k <= MAXORDER);
+
+	if (state->losers == NULL)
+		state->losers = (int *) MemoryContextAlloc(state->base.maincontext,
+												   MAXORDER * 2 * sizeof(int));
+
+	for (i = k; i < 2 * k; i++)
+	{
+		winner[i] = i - k;
+		state->losers[i] = i - k;
+	}
+
+	for (i = k - 1; i >= 1; i--)
+	{
+		int l = i * 2;
+		int r = l + 1;
+
+		if (COMPARETUP(state,
+					   &state->memtuples[winner[l]],
+					   &state->memtuples[winner[r]]) < 0)
+		{
+			winner[i] = winner[l];
+			state->losers[i] = winner[r];
+		}
+		else
+		{
+			winner[i] = winner[r];
+			state->losers[i] = winner[l];
+		}
+	}
+
+	/* set the final winner to state->losers[0] */
+	state->losers[0] = winner[1];
+}
+
+static void
+tuplesort_loser_tree_adjust(Tuplesortstate *state, int start)
+{
+	int winner;
+	int parent;
+
+	Assert(state->losers != NULL);
+	Assert(start >= state->memtupcount && start < state->memtupcount * 2);
+
+	winner = state->losers[start];
+
+	for (parent = start / 2; parent > 0; parent /= 2)
+	{
+		int loser = state->losers[parent];
+
+		/* eof is always the loser */
+		if (loser == LOSER_TREE_EOF)
+			continue;
+
+		if (winner == LOSER_TREE_EOF ||
+			COMPARETUP(state,
+					   &state->memtuples[winner],
+					   &state->memtuples[loser]) > 0)
+		{
+			state->losers[parent] = winner;
+			winner = loser;
+		}
+	}
+
+	/* set the final winner to state->losers[0] */
+	state->losers[0] = winner;
+}
+
 /*
  * Function to reverse the sort direction from its current state
  *
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index f21ec37da89..8c22eed716b 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -324,6 +324,7 @@ extern PGDLLIMPORT int tcp_user_timeout;
 extern PGDLLIMPORT char *role_string;
 extern PGDLLIMPORT bool in_hot_standby_guc;
 extern PGDLLIMPORT bool trace_sort;
+extern PGDLLIMPORT bool enable_loser_tree;
 
 #ifdef DEBUG_BOUNDED_SORT
 extern PGDLLIMPORT bool optimize_bounded_sort;
-- 
2.34.1

