diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 65e9af8..15aaaef 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -29,6 +29,7 @@
 #include "utils/bytea.h"
 #include "utils/lsyscache.h"
 #include "utils/pg_locale.h"
+#include "utils/sortsupport.h"
 
 
 /* GUC variable */
@@ -50,12 +51,32 @@ typedef struct
 	int			skiptable[256]; /* skip distance for given mismatched char */
 } TextPositionState;
 
+typedef struct
+{
+	char	   *buf1;
+	char	   *buf2;
+	int			buflen1;
+	int			buflen2;
+#ifdef HAVE_LOCALE_T
+	pg_locale_t locale;
+#endif
+} TextSortSupport;
+
+/*
+ * This should be large enough that most strings will be fit, but small
+ * enough that we feel comfortable putting it on the stack.  It should also
+ * be a power of 2, since we pass it to TYPEALIGN().
+ */
+#define TEXTBUFLEN		1024
+
 #define DatumGetUnknownP(X)			((unknown *) PG_DETOAST_DATUM(X))
 #define DatumGetUnknownPCopy(X)		((unknown *) PG_DETOAST_DATUM_COPY(X))
 #define PG_GETARG_UNKNOWN_P(n)		DatumGetUnknownP(PG_GETARG_DATUM(n))
 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
 #define PG_RETURN_UNKNOWN_P(x)		PG_RETURN_POINTER(x)
 
+static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
+static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
 static int	text_cmp(text *arg1, text *arg2, Oid collid);
 static int32 text_length(Datum str);
 static int	text_position(text *t1, text *t2);
@@ -1340,10 +1361,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
 	}
 	else
 	{
-#define STACKBUFLEN		1024
-
-		char		a1buf[STACKBUFLEN];
-		char		a2buf[STACKBUFLEN];
+		char		a1buf[TEXTBUFLEN];
+		char		a2buf[TEXTBUFLEN];
 		char	   *a1p,
 				   *a2p;
 #ifdef HAVE_LOCALE_T
@@ -1376,24 +1395,24 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
 			int			a2len;
 			int			r;
 
-			if (len1 >= STACKBUFLEN / 2)
+			if (len1 >= TEXTBUFLEN / 2)
 			{
 				a1len = len1 * 2 + 2;
 				a1p = palloc(a1len);
 			}
 			else
 			{
-				a1len = STACKBUFLEN;
+				a1len = TEXTBUFLEN;
 				a1p = a1buf;
 			}
-			if (len2 >= STACKBUFLEN / 2)
+			if (len2 >= TEXTBUFLEN / 2)
 			{
 				a2len = len2 * 2 + 2;
 				a2p = palloc(a2len);
 			}
 			else
 			{
-				a2len = STACKBUFLEN;
+				a2len = TEXTBUFLEN;
 				a2p = a2buf;
 			}
 
@@ -1458,11 +1477,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
 		}
 #endif   /* WIN32 */
 
-		if (len1 >= STACKBUFLEN)
+		if (len1 >= TEXTBUFLEN)
 			a1p = (char *) palloc(len1 + 1);
 		else
 			a1p = a1buf;
-		if (len2 >= STACKBUFLEN)
+		if (len2 >= TEXTBUFLEN)
 			a2p = (char *) palloc(len2 + 1);
 		else
 			a2p = a2buf;
@@ -1666,6 +1685,176 @@ bttextcmp(PG_FUNCTION_ARGS)
 	PG_RETURN_INT32(result);
 }
 
+Datum
+bttextsortsupport(PG_FUNCTION_ARGS)
+{
+	SortSupport		ssup = (SortSupport) PG_GETARG_POINTER(0);
+	Oid				collid = ssup->ssup_collation;
+	TextSortSupport	   *tss;
+
+	/*
+	 * If LC_COLLATE = C, we can make things quite a bit faster by using
+	 * memcmp() rather than strcoll().  To minimize the per-comparison
+	 * overhead, we make this decision just once for the whole sort.
+	 */
+	if (lc_collate_is_c(collid))
+	{
+		ssup->comparator = bttextfastcmp_c;
+		PG_RETURN_VOID();
+	}
+
+	/*
+	 * WIN32 requires complex hacks when the database encoding is UTF-8
+	 * (except when using the "C" collation).  For now, we don't optimize
+	 * that case.  If we wanted to do this in the future, it would make sense
+	 * to have a third comparator function (perhaps bttextfastcmp_win32_utf8)
+	 * just for this case.
+	 */
+#ifdef WIN32
+	if (GetDatabaseEncoding() == PG_UTF8)
+		PG_RETURN_VOID();
+#endif /* WIN32 */
+
+	/*
+	 * We need a collation-sensitive comparison.  To make things faster,
+	 * we'll figure out the collation based on the locale id and cache the
+	 * result.  Also, since strcoll() requires NUL-terminated inputs, we'll
+	 * prepare a set of palloc'd buffers to use as temporary workspace.
+	 * In the ad-hoc comparison case we only use palloc'd buffers when we
+	 * need more space than we're comfortable allocating on the stack, but
+	 * here we can keep the buffers around for the whole sort, so it makes
+	 * sense to allocate them once and use them unconditionally.  We'll
+	 * expand them on the fly if needed.
+	 */
+	tss = MemoryContextAlloc(ssup->ssup_cxt, sizeof(TextSortSupport));
+	tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, TEXTBUFLEN);
+	tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, TEXTBUFLEN);
+	tss->buflen1 = TEXTBUFLEN;
+	tss->buflen2 = TEXTBUFLEN;
+#ifdef HAVE_LOCALE_T
+	tss->locale = 0;
+#endif
+
+	if (collid != DEFAULT_COLLATION_OID)
+	{
+		if (!OidIsValid(collid))
+		{
+			/*
+			 * This typically means that the parser could not resolve a
+			 * conflict of implicit collations, so report it that way.
+			 */
+			ereport(ERROR,
+					(errcode(ERRCODE_INDETERMINATE_COLLATION),
+					 errmsg("could not determine which collation to use for string comparison"),
+					 errhint("Use the COLLATE clause to set the collation explicitly.")));
+		}
+#ifdef HAVE_LOCALE_T
+		tss->locale = pg_newlocale_from_collation(collid);
+#endif
+	}
+
+	ssup->ssup_extra = tss;
+	ssup->comparator = bttextfastcmp_locale;
+
+	PG_RETURN_VOID();
+}
+
+static int
+bttextfastcmp_c(Datum x, Datum y, SortSupport ssup)
+{
+	text	   *arg1 = DatumGetTextPP(x);
+	text	   *arg2 = DatumGetTextPP(y);
+	char	   *a1p,
+			   *a2p;
+	int			len1,
+				len2,
+				result;
+
+	a1p = VARDATA_ANY(arg1);
+	a2p = VARDATA_ANY(arg2);
+
+	len1 = VARSIZE_ANY_EXHDR(arg1);
+	len2 = VARSIZE_ANY_EXHDR(arg2);
+
+	result = memcmp(a1p, a2p, Min(len1, len2));
+	if ((result == 0) && (len1 != len2))
+		result = (len1 < len2) ? -1 : 1;
+
+	/* We can't afford to leak memory here. */
+	if (PointerGetDatum(arg1) != x)
+		pfree(arg1);
+	if (PointerGetDatum(arg2) != y)
+		pfree(arg2);
+
+	return result;
+}
+
+static int
+bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
+{
+	text	   *arg1 = DatumGetTextPP(x);
+	text	   *arg2 = DatumGetTextPP(y);
+	TextSortSupport	   *tss = (TextSortSupport *) ssup->ssup_extra;
+	char	   *a1p,
+			   *a2p;
+	int			len1,
+				len2,
+				result;
+
+	a1p = VARDATA_ANY(arg1);
+	a2p = VARDATA_ANY(arg2);
+
+	len1 = VARSIZE_ANY_EXHDR(arg1);
+	len2 = VARSIZE_ANY_EXHDR(arg2);
+
+	/*
+	 * We avoid repeated palloc/pfree on long strings by keeping the buffers
+	 * we allocate around for the duration of the sort.  When we expand them,
+	 * we round off the to the next multiple of TEXTBUFLEN in order to avoid
+	 * repeatedly expanding them by very small amounts.
+	 */
+	if (len1 >= tss->buflen1)
+	{
+		pfree(tss->buf1);
+		tss->buflen1 = TYPEALIGN(TEXTBUFLEN, len1);
+		tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen1);
+	}
+	if (len2 >= tss->buflen2)
+	{
+		pfree(tss->buf2);
+		tss->buflen2 = TYPEALIGN(TEXTBUFLEN, len2);
+		tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2);
+	}
+
+	memcpy(tss->buf1, a1p, len1);
+	tss->buf1[len1] = '\0';
+	memcpy(tss->buf2, a2p, len2);
+	tss->buf2[len2] = '\0';
+
+#ifdef HAVE_LOCALE_T
+	if (tss->locale)
+		result = strcoll_l(tss->buf1, tss->buf2, tss->locale);
+	else
+#endif
+		result = strcoll(tss->buf1, tss->buf2);
+
+	/*
+	 * In some locales strcoll() can claim that nonidentical strings are
+	 * equal.  Believing that would be bad news for a number of reasons,
+	 * so we follow Perl's lead and sort "equal" strings according to
+	 * strcmp().
+	 */
+	if (result == 0)
+		result = strcmp(tss->buf1, tss->buf2);
+
+	/* We can't afford to leak memory here. */
+	if (PointerGetDatum(arg1) != x)
+		pfree(arg1);
+	if (PointerGetDatum(arg2) != y)
+		pfree(arg2);
+
+	return result;
+}
 
 Datum
 text_larger(PG_FUNCTION_ARGS)
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 72307de..89f0958 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -123,6 +123,7 @@ DATA(insert (	1989   26 26 2 3134 ));
 DATA(insert (	1991   30 30 1 404 ));
 DATA(insert (	2994   2249 2249 1 2987 ));
 DATA(insert (	1994   25 25 1 360 ));
+DATA(insert (	1994   25 25 2 3935 ));
 DATA(insert (	1996   1083 1083 1 1107 ));
 DATA(insert (	2000   1266 1266 1 1358 ));
 DATA(insert (	2002   1562 1562 1 1672 ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 8700d0d..bb51a17 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -606,6 +606,8 @@ DATA(insert OID = 3135 ( btnamesortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i
 DESCR("sort support");
 DATA(insert OID = 360 (  bttextcmp		   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "25 25" _null_ _null_ _null_ _null_ bttextcmp _null_ _null_ _null_ ));
 DESCR("less-equal-greater");
+DATA(insert OID = 3935 ( bttextsortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2278 "2281" _null_ _null_ _null_ _null_ bttextsortsupport _null_ _null_ _null_ ));
+DESCR("sort support");
 DATA(insert OID = 377 (  cash_cmp		   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "790 790" _null_ _null_ _null_ _null_ cash_cmp _null_ _null_ _null_ ));
 DESCR("less-equal-greater");
 DATA(insert OID = 380 (  btreltimecmp	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "703 703" _null_ _null_ _null_ _null_ btreltimecmp _null_ _null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index fe253bc..b546c7c 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -309,6 +309,7 @@ extern Datum bttintervalcmp(PG_FUNCTION_ARGS);
 extern Datum btcharcmp(PG_FUNCTION_ARGS);
 extern Datum btnamecmp(PG_FUNCTION_ARGS);
 extern Datum bttextcmp(PG_FUNCTION_ARGS);
+extern Datum bttextsortsupport(PG_FUNCTION_ARGS);
 
 /*
  *		Per-opclass sort support functions for new btrees.  Like the