[HACKERS] SortSupport for UUID type

Peter Geoghegan Sat, 03 Oct 2015 23:22:09 -0700

Attached is SortSupport for UUID type patch. This accelerates all UUID
related sort-intense operations, making them about twice as fast with
millions of tuples. I know that Heroku has plenty of tables used by
various applications with a UUID primary key, so it will be nice to
have CREATE INDEX go so much faster in those cases. The SortSupport
routine uses abbreviation, and relies on unsigned integer comparisons.
It has a dependency on the new abbreviated key for text patch series
[1].


For full details, see commit message. It's a bit unfortunate that I
have yet another sort patch here, without being much closer to getting
every other such patch committed, but I happened to have written this
patch a while ago. I wanted to shape-up the common API that this patch
and the related text patch use, so it just made sense to submit the
UUID patch now.

This patch is not all that exciting -- the techniques used here are
very simple, and it's all familiar territory for us. I expect that
this patch will not be at all contentious when someone eventually gets
around to reviewing it.

[1] 
http://www.postgresql.org/message-id/CAM3SWZSTKmOnosidBYeCY9pJT=hhguwwmxxpwsejb7kf2og...@mail.gmail.com
-- 
Peter Geoghegan

From 3af1c189a89fdb332a3eb23e98e9ff3cdfbb7c25 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <[email protected]>
Date: Tue, 21 Jul 2015 16:18:25 -0700
Subject: [PATCH 4/4] Add SortSupport routine for UUID data type

This introduces a simple encoding scheme to produce abbreviated keys:
pack as many bytes of each UUID as will fit into a Datum.  On
little-endian machines, a byteswap is also performed; the abbreviated
comparator can therefore just consist of a simple 3-way unsigned integer
comparison.  This comports with regular UUID comparisons, because they
use memcmp() to compare authoritative UUID datums.

Note:  This patch depends on "Add BSWAP64() byte-swapping macro" patch,
which was independently submitted as infrastructure for another feature
patch that speeds up text sorts.
---
 src/backend/utils/adt/uuid.c    | 179 ++++++++++++++++++++++++++++++++++++++++
 src/include/catalog/pg_amproc.h |   1 +
 src/include/catalog/pg_proc.h   |   2 +
 src/include/utils/builtins.h    |   1 +
 4 files changed, 183 insertions(+)

diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index bb76b8d..ff02c73 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -14,8 +14,11 @@
 #include "postgres.h"
 
 #include "access/hash.h"
+#include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
 #include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/sortsupport.h"
 #include "utils/uuid.h"
 
 /* uuid size in bytes */
@@ -27,8 +30,21 @@ struct pg_uuid_t
 	unsigned char data[UUID_LEN];
 };
 
+/* sortsupport for uuid */
+typedef struct
+{
+	int64		input_count;	/* number of non-null values seen */
+	bool		estimating;		/* true if estimating cardinality */
+
+	hyperLogLogState abbr_card; /* cardinality estimator */
+} uuid_sortsupport_state;
+
 static void string_to_uuid(const char *source, pg_uuid_t *uuid);
 static int	uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
+static int	uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
+static int	uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
+static bool	uuid_abbrev_abort(int memtupcount, SortSupport ssup);
+static Datum	uuid_abbrev_convert(Datum original, SortSupport ssup);
 
 Datum
 uuid_in(PG_FUNCTION_ARGS)
@@ -222,6 +238,169 @@ uuid_cmp(PG_FUNCTION_ARGS)
 	PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
 }
 
+/*
+ * Sort support strategy routine
+ */
+Datum
+uuid_sortsupport(PG_FUNCTION_ARGS)
+{
+	SortSupport	ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+	ssup->comparator = uuid_fast_cmp;
+	ssup->ssup_extra = NULL;
+
+	if (ssup->abbreviate)
+	{
+		uuid_sortsupport_state	   *uss;
+		MemoryContext				oldcontext;
+
+		oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+		uss = palloc(sizeof(uuid_sortsupport_state));
+		uss->input_count = 0;
+		uss->estimating = true;
+		initHyperLogLog(&uss->abbr_card, 10);
+
+		ssup->ssup_extra = uss;
+
+		ssup->comparator = uuid_cmp_abbrev;
+		ssup->abbrev_converter = uuid_abbrev_convert;
+		ssup->abbrev_abort = uuid_abbrev_abort;
+		ssup->abbrev_full_comparator = uuid_fast_cmp;
+
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * SortSupport comparison func
+ */
+static int
+uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
+{
+	pg_uuid_t  *arg1 = DatumGetUUIDP(x);
+	pg_uuid_t  *arg2 = DatumGetUUIDP(y);
+
+	return uuid_internal_cmp(arg1, arg2);
+}
+
+/*
+ * Abbreviated key comparison func
+ */
+static int
+uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
+{
+	if (x > y)
+		return 1;
+	else if (x == y)
+		return 0;
+	else
+		return -1;
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization.
+ *
+ * We pay no attention to the cardinality of the non-abbreviated data, because
+ * there is no equality fast-path within authoritative uuid comparator.
+ */
+static bool
+uuid_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+	uuid_sortsupport_state	   *uss = ssup->ssup_extra;
+	double						abbr_card;
+
+	if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
+		return false;
+
+	abbr_card = estimateHyperLogLog(&uss->abbr_card);
+
+	/*
+	 * If we have >100k distinct values, then even if we were sorting many
+	 * billion rows we'd likely still break even, and the penalty of undoing
+	 * that many rows of abbrevs would probably not be worth it.  Stop even
+	 * counting at that point.
+	 */
+	if (abbr_card > 100000.0)
+	{
+#ifdef TRACE_SORT
+		if (trace_sort)
+			elog(LOG,
+				 "uuid_abbrev: estimation ends at cardinality %f"
+				 " after " INT64_FORMAT " values (%d rows)",
+				 abbr_card, uss->input_count, memtupcount);
+#endif
+		uss->estimating = false;
+		return false;
+	}
+
+	/*
+	 * Target minimum cardinality is 1 per ~2k of non-null inputs.  0.5 row
+	 * fudge factor allows us to abort earlier on genuinely pathological data
+	 * where we've had exactly one abbreviated value in the first 2k (non-null)
+	 * rows.
+	 */
+	if (abbr_card < uss->input_count / 2000.0 + 0.5)
+	{
+#ifdef TRACE_SORT
+		if (trace_sort)
+			elog(LOG,
+				 "uuid_abbrev: aborting abbreviation at cardinality %f"
+				 " below threshold %f after " INT64_FORMAT " values (%d rows)",
+				 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
+				 memtupcount);
+#endif
+		return true;
+	}
+
+#ifdef TRACE_SORT
+	if (trace_sort)
+		elog(LOG,
+			 "uuid_abbrev: cardinality %f after " INT64_FORMAT
+			 " values (%d rows)", abbr_card, uss->input_count, memtupcount);
+#endif
+
+	return false;
+}
+
+/*
+ * Conversion routine for sortsupport.  Converts original uuid representation
+ * to abbreviated key representation.  Our encoding strategy is simple -- pack
+ * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
+ * machines, the bytes are stored in reverse order), and treat it as an
+ * unsigned integer.
+ */
+static Datum
+uuid_abbrev_convert(Datum original, SortSupport ssup)
+{
+	uuid_sortsupport_state	   *uss = ssup->ssup_extra;
+	pg_uuid_t				   *authoritative = DatumGetUUIDP(original);
+	Datum						res;
+
+	memcpy((char *) &res, authoritative->data, sizeof(Datum));
+	uss->input_count += 1;
+
+	if (uss->estimating)
+	{
+		uint32		tmp;
+
+#if SIZEOF_DATUM == 8
+		tmp = ((uint32) res ^ (uint32) ((uint64) res >> 32));
+#else							/* SIZEOF_DATUM != 8 */
+		tmp = ((uint32) res);
+#endif
+
+		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+	}
+
+	/* Byteswap on little-endian machines: */
+	res = ABBREV_STRING_UINT(res);
+
+	return res;
+}
+
 /* hash index support */
 Datum
 uuid_hash(PG_FUNCTION_ARGS)
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index b57d6e6..7db2015 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -134,6 +134,7 @@ DATA(insert (	2233   703 703 1  380 ));
 DATA(insert (	2234   704 704 1  381 ));
 DATA(insert (	2789   27 27 1 2794 ));
 DATA(insert (	2968   2950 2950 1 2960 ));
+DATA(insert (	2968   2950 2950 2 3300 ));
 DATA(insert (	2994   2249 2249 1 2987 ));
 DATA(insert (	3194   2249 2249 1 3187 ));
 DATA(insert (	3253   3220 3220 1 3251 ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index eb55b3a..495b801 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4467,6 +4467,8 @@ DATA(insert OID = 2958 (  uuid_gt		   PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0
 DATA(insert OID = 2959 (  uuid_ne		   PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "2950 2950" _null_ _null_ _null_ _null_ _null_ uuid_ne _null_ _null_ _null_ ));
 DATA(insert OID = 2960 (  uuid_cmp		   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "2950 2950" _null_ _null_ _null_ _null_ _null_ uuid_cmp _null_ _null_ _null_ ));
 DESCR("less-equal-greater");
+DATA(insert OID = 3300 (  uuid_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ uuid_sortsupport _null_ _null_ _null_ ));
+DESCR("sort support");
 DATA(insert OID = 2961 (  uuid_recv		   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2950 "2281" _null_ _null_ _null_ _null_ _null_ uuid_recv _null_ _null_ _null_ ));
 DESCR("I/O");
 DATA(insert OID = 2962 (  uuid_send		   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 17 "2950" _null_ _null_ _null_ _null_ _null_ uuid_send _null_ _null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index fc1679e..cbfaa0a 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -1173,6 +1173,7 @@ extern Datum uuid_ge(PG_FUNCTION_ARGS);
 extern Datum uuid_gt(PG_FUNCTION_ARGS);
 extern Datum uuid_ne(PG_FUNCTION_ARGS);
 extern Datum uuid_cmp(PG_FUNCTION_ARGS);
+extern Datum uuid_sortsupport(PG_FUNCTION_ARGS);
 extern Datum uuid_hash(PG_FUNCTION_ARGS);
 
 /* windowfuncs.c */
-- 
1.9.1

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] SortSupport for UUID type

Reply via email to