Hello,

I've attached a patch to add SortSupport for Postgres' macaddr which has the
effect of improving the performance of sorting operations for the type. The
strategy that I employ is very similar to that for UUID, which is to create
abbreviated keys by packing as many bytes from the MAC address as possible into
Datums, and then performing fast unsigned integer comparisons while sorting.

I ran some informal local benchmarks, and for cardinality greater than 100k
rows, I see a speed up on `CREATE INDEX` of roughly 25% to 65%. (For those
interested, I put a few more numbers into a small report here [2].)

Admittedly, this is not quite as useful as speeding up sorting on a more common
data type like TEXT or UUID, but the change still seems like a useful
performance improvement. I largely wrote it as an exercise to familiarize
myself with the Postgres codebase.

I'll add an entry into the current commitfest as suggested by the Postgres Wiki
and follow up here with a link.

Thanks, and if anyone has feedback or other thoughts, let me know!

Brandur

[1] 
https://www.postgresql.org/message-id/CAM3SWZR4avsTwwNVUzRNbHk8v36W-QBqpoKg=ogkwwy0dkt...@mail.gmail.com
[2] 
https://docs.google.com/spreadsheets/d/1cUqbg9RTgSo16WrrfJJwDP1eDaWL3TNOxnIOFNpfwgA/edit?usp=sharing
>From 73f7dbf1921eea7106c93654dd7bf1fdd0888a07 Mon Sep 17 00:00:00 2001
From: Brandur <bran...@mutelight.org>
Date: Tue, 16 Aug 2016 17:07:07 -0700
Subject: [PATCH] Implement SortSupport for macaddr data type

Introduces a scheme to produce abbreviated keys for the macaddr type
which involves packing as many of a MAC address' six bytes as possible
into a Datum (and adding two zeroed bytes of padding on a 64-bit
system). Sorting routines can then take advantage of the abbreviated
keys by performing fast unsigned integer comparisons, resulting in a
significant performance increase when operating on > 100k rows or more.

This implementation is modeled closely on the addition of SortSupport
for the UUID data type, which employs a very similar strategy.
---
 src/backend/utils/adt/mac.c     | 210 ++++++++++++++++++++++++++++++++++++++++
 src/include/catalog/pg_amproc.h |   1 +
 src/include/catalog/pg_proc.h   |   2 +
 src/include/utils/builtins.h    |   1 +
 4 files changed, 214 insertions(+)

diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c
index 509315a..471c3f0 100644
--- a/src/backend/utils/adt/mac.c
+++ b/src/backend/utils/adt/mac.c
@@ -7,9 +7,13 @@
 #include "postgres.h"
 
 #include "access/hash.h"
+#include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
+#include "port/pg_bswap.h"
 #include "utils/builtins.h"
+#include "utils/guc.h"
 #include "utils/inet.h"
+#include "utils/sortsupport.h"
 
 
 /*
@@ -22,6 +26,21 @@
 #define lobits(addr) \
   ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f)))
 
+/* sortsupport for macaddr */
+typedef struct
+{
+       int64           input_count;    /* number of non-null values seen */
+       bool            estimating;             /* true if estimating 
cardinality */
+
+       hyperLogLogState abbr_card; /* cardinality estimator */
+} macaddr_sortsupport_state;
+
+static int macaddr_cmp_internal(macaddr *a1, macaddr *a2);
+static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup);
+static int macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
+static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup);
+static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup);
+
 /*
  *     MAC address reader.  Accepts several common notations.
  */
@@ -318,3 +337,194 @@ macaddr_trunc(PG_FUNCTION_ARGS)
 
        PG_RETURN_MACADDR_P(result);
 }
+
+/*
+ * SortSupport strategy function. Populates a SortSupport struct with the
+ * information necessary to use comparison by abbreviated keys.
+ */
+Datum
+macaddr_sortsupport(PG_FUNCTION_ARGS)
+{
+       SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+       ssup->comparator = macaddr_fast_cmp;
+       ssup->ssup_extra = NULL;
+
+       if (ssup->abbreviate)
+       {
+               macaddr_sortsupport_state *uss;
+               MemoryContext oldcontext;
+
+               oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+               uss = palloc(sizeof(macaddr_sortsupport_state));
+               uss->input_count = 0;
+               uss->estimating = true;
+               initHyperLogLog(&uss->abbr_card, 10);
+
+               ssup->ssup_extra = uss;
+
+               ssup->comparator = macaddr_cmp_abbrev;
+               ssup->abbrev_converter = macaddr_abbrev_convert;
+               ssup->abbrev_abort = macaddr_abbrev_abort;
+               ssup->abbrev_full_comparator = macaddr_fast_cmp;
+
+               MemoryContextSwitchTo(oldcontext);
+       }
+
+       PG_RETURN_VOID();
+}
+
+/*
+ * SortSupport "traditional" comparison function. Pulls two MAC addresses from
+ * the heap and runs a standard comparison on them.
+ */
+static int
+macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup)
+{
+       macaddr *arg1 = DatumGetMacaddrP(x);
+       macaddr *arg2 = DatumGetMacaddrP(y);
+
+       return macaddr_cmp_internal(arg1, arg2);
+}
+
+/*
+ * SortSupport abbreviated key comparison function. Compares two MAC addresses
+ * quickly by treating them like integers, and without having to go to the
+ * heap.
+ */
+static int
+macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
+{
+       if (x > y)
+               return 1;
+       else if (x == y)
+               return 0;
+       else
+               return -1;
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization.
+ *
+ * We pay no attention to the cardinality of the non-abbreviated data, because
+ * there is no equality fast-path within the standard macaddr comparator.
+ */
+static bool
+macaddr_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+       macaddr_sortsupport_state *uss = ssup->ssup_extra;
+       double abbr_card;
+
+       if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
+               return false;
+
+       abbr_card = estimateHyperLogLog(&uss->abbr_card);
+
+       /*
+        * If we have >100k distinct values, then even if we were sorting many
+        * billion rows we'd likely still break even, and the penalty of undoing
+        * that many rows of abbrevs would probably not be worth it. At this 
point
+        * we stop counting because we know that we're now fully committed.
+        */
+       if (abbr_card > 100000.0)
+       {
+#ifdef TRACE_SORT
+               if (trace_sort)
+                       elog(LOG,
+                                "macaddr_abbrev: estimation ends at 
cardinality %f"
+                                " after " INT64_FORMAT " values (%d rows)",
+                                abbr_card, uss->input_count, memtupcount);
+#endif
+               uss->estimating = false;
+               return false;
+       }
+
+       /*
+        * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
+        * fudge factor allows us to abort earlier on genuinely pathological 
data
+        * where we've had exactly one abbreviated value in the first 2k
+        * (non-null) rows.
+        */
+       if (abbr_card < uss->input_count / 2000.0 + 0.5)
+       {
+#ifdef TRACE_SORT
+               if (trace_sort)
+                       elog(LOG,
+                                "macaddr_abbrev: aborting abbreviation at 
cardinality %f"
+                                " below threshold %f after " INT64_FORMAT " 
values (%d rows)",
+                                abbr_card, uss->input_count / 2000.0 + 0.5, 
uss->input_count,
+                                memtupcount);
+#endif
+               return true;
+       }
+
+#ifdef TRACE_SORT
+       if (trace_sort)
+               elog(LOG,
+                        "macaddr_abbrev: cardinality %f after " INT64_FORMAT
+                        " values (%d rows)", abbr_card, uss->input_count, 
memtupcount);
+#endif
+
+       return false;
+}
+
+/*
+ * SortSupport converstion routine. Converts original macaddr representation
+ * to abbreviated key representation.
+ *
+ * Packs as many bytes from a 6-byte MAC addresses into a Datum and treats it
+ * as an unsigned integer for purposes of comparison. On a 64-bit machine,
+ * there will be two zeroed bytes of padding.
+ */
+static Datum
+macaddr_abbrev_convert(Datum original, SortSupport ssup)
+{
+       macaddr_sortsupport_state *uss = ssup->ssup_extra;
+       macaddr *authoritative = DatumGetMacaddrP(original);
+       Datum res;
+
+       /*
+        * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes 
of
+        * the MAC address in. There will be two bytes of zero padding on the 
least
+        * significant end.
+        */
+#if SIZEOF_DATUM == 8
+       memset(&res, 0, SIZEOF_DATUM);
+       memcpy(&res, authoritative, sizeof(macaddr));
+#else                                                  /* SIZEOF_DATUM != 8 */
+       memcpy(&res, authoritative, SIZEOF_DATUM);
+#endif
+       uss->input_count += 1;
+
+       /*
+        * Cardinality estimation. The estimate uses uint32, so on a 64-bit
+        * architecture, XOR the two 32-bit halves together to produce slightly
+        * more entropy. The two zeroed bytes won't have any practical impact on
+        * this operation.
+        */
+       if (uss->estimating)
+       {
+               uint32 tmp;
+
+#if SIZEOF_DATUM == 8
+               tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
+#else                                                  /* SIZEOF_DATUM != 8 */
+               tmp = (uint32) res;
+#endif
+
+               addHyperLogLog(&uss->abbr_card, 
DatumGetUInt32(hash_uint32(tmp)));
+       }
+
+       /*
+        * Byteswap on little-endian machines.
+        *
+        * This is needed so that macaddr_cmp_abbrev() (an unsigned integer 
3-way
+        * comparator) works correctly on all platforms. Without this, the
+        * comparator would have to call memcmp() with a pair of pointers to the
+        * first byte of each abbreviated key, which is slower.
+        */
+       res = DatumBigEndianToNative(res);
+
+       return res;
+}
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 00320b4..b66c074 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -117,6 +117,7 @@ DATA(insert (       1976   20 23 1 2189 ));
 DATA(insert (  1976   20 21 1 2193 ));
 DATA(insert (  1982   1186 1186 1 1315 ));
 DATA(insert (  1984   829 829 1 836 ));
+DATA(insert (  1984   829 829 2 3401 ));
 DATA(insert (  1986   19 19 1 359 ));
 DATA(insert (  1986   19 19 2 3135 ));
 DATA(insert (  1988   1700 1700 1 1769 ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index af19c1a..2804cd6 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2106,6 +2106,8 @@ DATA(insert OID = 834 (  macaddr_ge                       
PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0
 DATA(insert OID = 835 (  macaddr_ne                    PGNSP PGUID 12 1 0 0 0 
f f f t t f i s 2 0 16 "829 829" _null_ _null_ _null_ _null_ _null_      
macaddr_ne _null_ _null_ _null_ ));
 DATA(insert OID = 836 (  macaddr_cmp           PGNSP PGUID 12 1 0 0 0 f f f f 
t f i s 2 0 23 "829 829" _null_ _null_ _null_ _null_ _null_      macaddr_cmp 
_null_ _null_ _null_ ));
 DESCR("less-equal-greater");
+DATA(insert OID = 3401 (  macaddr_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t 
f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ macaddr_sortsupport 
_null_ _null_ _null_ ));
+DESCR("sort support");
 DATA(insert OID = 3144 (  macaddr_not          PGNSP PGUID 12 1 0 0 0 f f f f 
t f i s 1 0 829 "829" _null_ _null_ _null_ _null_ _null_ macaddr_not _null_ 
_null_ _null_ ));
 DATA(insert OID = 3145 (  macaddr_and          PGNSP PGUID 12 1 0 0 0 f f f f 
t f i s 2 0 829 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_and _null_ 
_null_ _null_ ));
 DATA(insert OID = 3146 (  macaddr_or           PGNSP PGUID 12 1 0 0 0 f f f f 
t f i s 2 0 829 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_or _null_ 
_null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index a91be98..fc60b63 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -1018,6 +1018,7 @@ extern Datum macaddr_not(PG_FUNCTION_ARGS);
 extern Datum macaddr_and(PG_FUNCTION_ARGS);
 extern Datum macaddr_or(PG_FUNCTION_ARGS);
 extern Datum macaddr_trunc(PG_FUNCTION_ARGS);
+extern Datum macaddr_sortsupport(PG_FUNCTION_ARGS);
 extern Datum hashmacaddr(PG_FUNCTION_ARGS);
 
 /* numeric.c */
-- 
2.7.2

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to