commit 4a4fb12548eb2533f8804a691daad4bfb53302f4
Author: Robert Haas <rhaas@postgresql.org>
Date:   Tue Feb 14 23:45:06 2012 -0500

    Revert "Revert "Multi-key specializations.""
    
    This reverts commit 6319911c04ab827449aab0c68cb7d5966825746e.

diff --git a/src/backend/utils/sort/gen_qsort_tuple.pl b/src/backend/utils/sort/gen_qsort_tuple.pl
index 40d5548..a31a3e2 100755
--- a/src/backend/utils/sort/gen_qsort_tuple.pl
+++ b/src/backend/utils/sort/gen_qsort_tuple.pl
@@ -49,6 +49,25 @@ print <<'EOM';
 EOM
 emit_qsort_implementation();
 
+$SUFFIX = 'heap';
+$EXTRAARGS = ', Tuplesortstate *state';
+$EXTRAPARAMS = ', state';
+$CMPPARAMS = ', state';
+print <<'EOM';
+static pg_always_inline int
+cmp_heap(SortTuple *a, SortTuple *b, Tuplesortstate *state)
+{
+	int compare;
+
+	compare = ApplySortComparator(a->datum1, a->isnull1,
+								  b->datum1, b->isnull1, state->sortKeys);
+	if (compare == 0)
+		compare = state->comparetup_rest(a, b, state);
+	return compare;
+}
+EOM
+emit_qsort_implementation();
+
 sub emit_qsort_boilerplate
 {
 	print <<'EOM';
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 10b19c0..cdaa230 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -229,6 +229,12 @@ struct Tuplesortstate
 	SortTupleComparator	comparetup;
 
 	/*
+	 * Function to compare the non-leading columns of two tuples, as per
+	 * above.
+	 */
+	SortTupleComparator	comparetup_rest;
+
+	/*
 	 * Function to copy a supplied input tuple into palloc'd space and set up
 	 * its SortTuple representation (ie, set tuple/datum1/isnull1).  Also,
 	 * state->availMem must be decreased by the amount of space used for the
@@ -462,6 +468,8 @@ static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK);
 static void markrunend(Tuplesortstate *state, int tapenum);
 static int comparetup_heap(const SortTuple *a, const SortTuple *b,
 				Tuplesortstate *state);
+static int comparetup_heap_rest(const SortTuple *a, const SortTuple *b,
+				Tuplesortstate *state);
 static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup);
 static void writetup_heap(Tuplesortstate *state, int tapenum,
 			  SortTuple *stup);
@@ -615,6 +623,7 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 								randomAccess);
 
 	state->comparetup = comparetup_heap;
+	state->comparetup_rest = comparetup_heap_rest;
 	state->copytup = copytup_heap;
 	state->writetup = writetup_heap;
 	state->readtup = readtup_heap;
@@ -1238,6 +1247,8 @@ tuplesort_performsort(Tuplesortstate *state)
 				if (state->onlyKey != NULL)
 					qsort_ssup(state->memtuples, state->memtupcount,
 							   state->onlyKey);
+				else if (state->comparetup_rest != NULL)
+					qsort_heap(state->memtuples, state->memtupcount, state);
 				else
 					qsort_tuple(state->memtuples,
 								state->memtupcount,
@@ -2712,6 +2723,49 @@ comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 	return 0;
 }
 
+static int
+comparetup_heap_rest(const SortTuple *a, const SortTuple *b,
+					 Tuplesortstate *state)
+{
+	SortSupport	sortKey = state->sortKeys;
+	HeapTupleData ltup;
+	HeapTupleData rtup;
+	TupleDesc	tupDesc;
+	int			nkey;
+	int32		compare = 0;
+
+	/*
+	 * We don't have to compare the leading sort key, because qsort_heap will
+	 * have already done that.  Our join is to compare the remaining sort keys
+	 * to break ties.
+	 */
+	ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
+	ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET);
+	rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
+	rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET);
+	tupDesc = state->tupDesc;
+	sortKey++;
+	for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++)
+	{
+		AttrNumber	attno = sortKey->ssup_attno;
+		Datum		datum1,
+					datum2;
+		bool		isnull1,
+					isnull2;
+
+		datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
+		datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
+
+		compare = ApplySortComparator(datum1, isnull1,
+									  datum2, isnull2,
+									  sortKey);
+		if (compare != 0)
+			return compare;
+	}
+
+	return 0;
+}
+
 static void
 copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
 {
diff --git a/src/include/c.h b/src/include/c.h
index 7396adb..ecb1a60 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -850,4 +850,18 @@ extern int	fdatasync(int fildes);
 /* /port compatibility functions */
 #include "port.h"
 
+/*
+ * Define a cross-platform "always-inline" macro. This is a very sharp tool that
+ * should be used judiciously.
+ */
+#ifdef __always_inline
+#define pg_always_inline __always_inline
+#elif defined(__force_inline)
+#define pg_always_inline __force_inline
+#elif __GNUC__
+#define pg_always_inline inline __attribute__((always_inline))
+#else
+#define pg_always_inline inline
+#endif
+
 #endif   /* C_H */
