On Mon, Jun 3, 2019 at 2:39 PM Peter Geoghegan <p...@bowt.ie> wrote:

>
> As you know, it's a bit weird that we're proposing adding sort support
> with abbreviated keys for a type that is 8 bytes, since you'd expect
> it to also be pass-by-value on most platforms, which largely defeats
> the purpose of having abbreviated keys (though sort support could
> still make sense, for the same reason it makes sense to have it for
> int8). However, macaddr8 isn't actually pass-by-value, and it seems
> too late to do anything about that now, so abbreviated keys actually
> make sense.
>
>
so, if what you say is true and it is either not worth it or
potentially a breaking change to make macaddr8 pass-by-value and
adding abbreviated sort support has the potential to avoid "pointer
chasing" and guarantee equivalent relative performance for macaddr8
and macaddr, then that seems worth it.

With regard to macaddr8_abbrev_convert() and memset(), I attached a patch
with the memset() removed, since it is superfluous here.

macaddr8_cmp_internal() already existed before this patch and I noticed
that it explicitly returns int32 whereas the return type of
macaddr_cmp_internal() is just specified as an int. I was wondering why.

I also noticed that the prototype for macaddr8_cmp_internal() was not
at the top of the file with the other static function prototypes. I
added it there, but I wasn't sure if there was some reason that it was
like that to begin with.

-- 
Melanie Plageman
From 5ed80391a70bb4048d3cb4c66c8a3343318144e0 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Fri, 31 May 2019 15:48:55 -0400
Subject: [PATCH v2 1/1] Implement SortSupport for macaddr8 data type

Introduces a scheme to produce abbreviated keys for the macaddr8 type.

Co-authored-by: Peter Geoghegan <p...@bowt.ie>
---
 src/backend/utils/adt/mac8.c      | 202 ++++++++++++++++++++++++++++++
 src/include/catalog/pg_amproc.dat |   3 +
 src/include/catalog/pg_proc.dat   |   3 +
 3 files changed, 208 insertions(+)

diff --git a/src/backend/utils/adt/mac8.c b/src/backend/utils/adt/mac8.c
index 0b1fe4978e..0a869fe323 100644
--- a/src/backend/utils/adt/mac8.c
+++ b/src/backend/utils/adt/mac8.c
@@ -21,10 +21,14 @@
 
 #include "postgres.h"
 
+#include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
+#include "port/pg_bswap.h"
 #include "utils/builtins.h"
+#include "utils/guc.h"
 #include "utils/hashutils.h"
 #include "utils/inet.h"
+#include "utils/sortsupport.h"
 
 /*
  *	Utility macros used for sorting and comparing:
@@ -48,6 +52,20 @@ static const signed char hexlookup[128] = {
 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 };
 
+/* sortsupport for macaddr8 */
+typedef struct
+{
+	int64		input_count;	/* number of non-null values seen */
+	bool		estimating;		/* true if estimating cardinality */
+
+	hyperLogLogState abbr_card; /* cardinality estimator */
+} macaddr8_sortsupport_state;
+
+static int32 macaddr8_cmp_internal(macaddr8 *a1, macaddr8 *a2);
+static int	macaddr8_fast_cmp(Datum x, Datum y, SortSupport ssup);
+static int	macaddr8_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
+static bool macaddr8_abbrev_abort(int memtupcount, SortSupport ssup);
+static Datum macaddr8_abbrev_convert(Datum original, SortSupport ssup);
 /*
  * hex2_to_uchar - convert 2 hex digits to a byte (unsigned char)
  *
@@ -575,3 +593,187 @@ macaddr8tomacaddr(PG_FUNCTION_ARGS)
 
 	PG_RETURN_MACADDR_P(result);
 }
+
+/*
+ * SortSupport strategy function. Populates a SortSupport struct with the
+ * information necessary to use comparison by abbreviated keys.
+ */
+Datum
+macaddr8_sortsupport(PG_FUNCTION_ARGS)
+{
+	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+	ssup->comparator = macaddr8_fast_cmp;
+	ssup->ssup_extra = NULL;
+
+	if (ssup->abbreviate)
+	{
+		macaddr8_sortsupport_state *uss;
+		MemoryContext oldcontext;
+
+		oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+		uss = palloc(sizeof(macaddr8_sortsupport_state));
+		uss->input_count = 0;
+		uss->estimating = true;
+		initHyperLogLog(&uss->abbr_card, 10);
+
+		ssup->ssup_extra = uss;
+
+		ssup->comparator = macaddr8_cmp_abbrev;
+		ssup->abbrev_converter = macaddr8_abbrev_convert;
+		ssup->abbrev_abort = macaddr8_abbrev_abort;
+		ssup->abbrev_full_comparator = macaddr8_fast_cmp;
+
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * SortSupport "traditional" comparison function. Pulls two 8-byte MAC
+ * addresses from the heap and runs a standard comparison on them.
+ */
+static int
+macaddr8_fast_cmp(Datum x, Datum y, SortSupport ssup)
+{
+	macaddr8    *arg1 = DatumGetMacaddr8P(x);
+	macaddr8    *arg2 = DatumGetMacaddr8P(y);
+
+	return macaddr8_cmp_internal(arg1, arg2);
+}
+
+/*
+ * SortSupport abbreviated key comparison function. Compares two 8-byte MAC
+ * addresses quickly by treating them like integers, and without having to go
+ * to the heap.
+ */
+static int
+macaddr8_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
+{
+	if (x > y)
+		return 1;
+	else if (x == y)
+		return 0;
+	else
+		return -1;
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization.
+ *
+ * We pay no attention to the cardinality of the non-abbreviated data, because
+ * there is no equality fast-path within authoritative macaddr8 comparator.
+ */
+static bool
+macaddr8_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+	macaddr8_sortsupport_state *uss = ssup->ssup_extra;
+	double		abbr_card;
+
+	if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
+		return false;
+
+	abbr_card = estimateHyperLogLog(&uss->abbr_card);
+
+	/*
+	 * If we have >100k distinct values, then even if we were sorting many
+	 * billion rows we'd likely still break even, and the penalty of undoing
+	 * that many rows of abbrevs would probably not be worth it. At this point
+	 * we stop counting because we know that we're now fully committed.
+	 */
+	if (abbr_card > 100000.0)
+	{
+#ifdef TRACE_SORT
+		if (trace_sort)
+			elog(LOG,
+				 "macaddr8_abbrev: estimation ends at cardinality %f"
+				 " after " INT64_FORMAT " values (%d rows)",
+				 abbr_card, uss->input_count, memtupcount);
+#endif
+		uss->estimating = false;
+		return false;
+	}
+
+	/*
+	 * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
+	 * fudge factor allows us to abort earlier on genuinely pathological data
+	 * where we've had exactly one abbreviated value in the first 2k
+	 * (non-null) rows.
+	 */
+	if (abbr_card < uss->input_count / 2000.0 + 0.5)
+	{
+#ifdef TRACE_SORT
+		if (trace_sort)
+			elog(LOG,
+				 "macaddr8_abbrev: aborting abbreviation at cardinality %f"
+				 " below threshold %f after " INT64_FORMAT " values (%d rows)",
+				 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
+				 memtupcount);
+#endif
+		return true;
+	}
+
+#ifdef TRACE_SORT
+	if (trace_sort)
+		elog(LOG,
+			 "macaddr8_abbrev: cardinality %f after " INT64_FORMAT
+			 " values (%d rows)", abbr_card, uss->input_count, memtupcount);
+#endif
+
+	return false;
+}
+
+/*
+ * SortSupport conversion routine. Converts original macaddr8 representation
+ * to abbreviated key representation.
+ *
+ * Packs the bytes of a 8-byte MAC address into a Datum and treats it as an
+ * unsigned integer for purposes of comparison. The integer is converted to
+ * native endianness to facilitate easy comparison.
+ */
+static Datum
+macaddr8_abbrev_convert(Datum original, SortSupport ssup)
+{
+	macaddr8_sortsupport_state *uss = ssup->ssup_extra;
+	macaddr8    *authoritative = DatumGetMacaddr8P(original);
+	Datum		res;
+
+#if SIZEOF_DATUM == 8
+	memcpy(&res, authoritative, sizeof(macaddr8));
+#else							/* SIZEOF_DATUM != 8 */
+	memcpy(&res, authoritative, SIZEOF_DATUM);
+#endif
+	uss->input_count += 1;
+
+	/*
+	 * Cardinality estimation. The estimate uses uint32, so on a 64-bit
+	 * architecture, XOR the two 32-bit halves together to produce slightly
+	 * more entropy.
+	 */
+	if (uss->estimating)
+	{
+		uint32		tmp;
+
+#if SIZEOF_DATUM == 8
+		tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
+#else							/* SIZEOF_DATUM != 8 */
+		tmp = (uint32) res;
+#endif
+
+		addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+	}
+
+	/*
+	 * Byteswap on little-endian machines.
+	 *
+	 * This is needed so that macaddr8_cmp_abbrev() (an unsigned integer 3-way
+	 * comparator) works correctly on all platforms. Without this, the
+	 * comparator would have to call memcmp() with a pair of pointers to the
+	 * first byte of each abbreviated key, which is slower.
+	 */
+	res = DatumBigEndianToNative(res);
+
+	return res;
+}
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat
index 020b7413cc..d77d98cbd1 100644
--- a/src/include/catalog/pg_amproc.dat
+++ b/src/include/catalog/pg_amproc.dat
@@ -214,6 +214,9 @@
   amprocrighttype => 'pg_lsn', amprocnum => '1', amproc => 'pg_lsn_cmp' },
 { amprocfamily => 'btree/macaddr8_ops', amproclefttype => 'macaddr8',
   amprocrighttype => 'macaddr8', amprocnum => '1', amproc => 'macaddr8_cmp' },
+{ amprocfamily => 'btree/macaddr8_ops', amproclefttype => 'macaddr8',
+  amprocrighttype => 'macaddr8', amprocnum => '2',
+  amproc => 'macaddr8_sortsupport' },
 { amprocfamily => 'btree/enum_ops', amproclefttype => 'anyenum',
   amprocrighttype => 'anyenum', amprocnum => '1', amproc => 'enum_cmp' },
 { amprocfamily => 'btree/tsvector_ops', amproclefttype => 'tsvector',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 87335248a0..2fe573e418 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3865,6 +3865,9 @@
 { oid => '4125', descr => 'set 7th bit in macaddr8',
   proname => 'macaddr8_set7bit', prorettype => 'macaddr8',
   proargtypes => 'macaddr8', prosrc => 'macaddr8_set7bit' },
+{ oid => '4142', descr => 'sort support',
+  proname => 'macaddr8_sortsupport', prorettype => 'void',
+  proargtypes => 'internal', prosrc => 'macaddr8_sortsupport' },
 
 # for inet type support
 { oid => '910', descr => 'I/O',
-- 
2.21.0

Reply via email to