Hello, I've attached a patch to add SortSupport for Postgres' macaddr which has the effect of improving the performance of sorting operations for the type. The strategy that I employ is very similar to that for UUID, which is to create abbreviated keys by packing as many bytes from the MAC address as possible into Datums, and then performing fast unsigned integer comparisons while sorting.
I ran some informal local benchmarks, and for cardinality greater than 100k rows, I see a speed up on `CREATE INDEX` of roughly 25% to 65%. (For those interested, I put a few more numbers into a small report here [2].) Admittedly, this is not quite as useful as speeding up sorting on a more common data type like TEXT or UUID, but the change still seems like a useful performance improvement. I largely wrote it as an exercise to familiarize myself with the Postgres codebase. I'll add an entry into the current commitfest as suggested by the Postgres Wiki and follow up here with a link. Thanks, and if anyone has feedback or other thoughts, let me know! Brandur [1] https://www.postgresql.org/message-id/CAM3SWZR4avsTwwNVUzRNbHk8v36W-QBqpoKg=ogkwwy0dkt...@mail.gmail.com [2] https://docs.google.com/spreadsheets/d/1cUqbg9RTgSo16WrrfJJwDP1eDaWL3TNOxnIOFNpfwgA/edit?usp=sharing
>From 73f7dbf1921eea7106c93654dd7bf1fdd0888a07 Mon Sep 17 00:00:00 2001 From: Brandur <bran...@mutelight.org> Date: Tue, 16 Aug 2016 17:07:07 -0700 Subject: [PATCH] Implement SortSupport for macaddr data type Introduces a scheme to produce abbreviated keys for the macaddr type which involves packing as many of a MAC address' six bytes as possible into a Datum (and adding two zeroed bytes of padding on a 64-bit system). Sorting routines can then take advantage of the abbreviated keys by performing fast unsigned integer comparisons, resulting in a significant performance increase when operating on > 100k rows or more. This implementation is modeled closely on the addition of SortSupport for the UUID data type, which employs a very similar strategy. --- src/backend/utils/adt/mac.c | 210 ++++++++++++++++++++++++++++++++++++++++ src/include/catalog/pg_amproc.h | 1 + src/include/catalog/pg_proc.h | 2 + src/include/utils/builtins.h | 1 + 4 files changed, 214 insertions(+) diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c index 509315a..471c3f0 100644 --- a/src/backend/utils/adt/mac.c +++ b/src/backend/utils/adt/mac.c @@ -7,9 +7,13 @@ #include "postgres.h" #include "access/hash.h" +#include "lib/hyperloglog.h" #include "libpq/pqformat.h" +#include "port/pg_bswap.h" #include "utils/builtins.h" +#include "utils/guc.h" #include "utils/inet.h" +#include "utils/sortsupport.h" /* @@ -22,6 +26,21 @@ #define lobits(addr) \ ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f))) +/* sortsupport for macaddr */ +typedef struct +{ + int64 input_count; /* number of non-null values seen */ + bool estimating; /* true if estimating cardinality */ + + hyperLogLogState abbr_card; /* cardinality estimator */ +} macaddr_sortsupport_state; + +static int macaddr_cmp_internal(macaddr *a1, macaddr *a2); +static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup); +static int macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup); +static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup); +static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup); + /* * MAC address reader. Accepts several common notations. */ @@ -318,3 +337,194 @@ macaddr_trunc(PG_FUNCTION_ARGS) PG_RETURN_MACADDR_P(result); } + +/* + * SortSupport strategy function. Populates a SortSupport struct with the + * information necessary to use comparison by abbreviated keys. + */ +Datum +macaddr_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = macaddr_fast_cmp; + ssup->ssup_extra = NULL; + + if (ssup->abbreviate) + { + macaddr_sortsupport_state *uss; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + uss = palloc(sizeof(macaddr_sortsupport_state)); + uss->input_count = 0; + uss->estimating = true; + initHyperLogLog(&uss->abbr_card, 10); + + ssup->ssup_extra = uss; + + ssup->comparator = macaddr_cmp_abbrev; + ssup->abbrev_converter = macaddr_abbrev_convert; + ssup->abbrev_abort = macaddr_abbrev_abort; + ssup->abbrev_full_comparator = macaddr_fast_cmp; + + MemoryContextSwitchTo(oldcontext); + } + + PG_RETURN_VOID(); +} + +/* + * SortSupport "traditional" comparison function. Pulls two MAC addresses from + * the heap and runs a standard comparison on them. + */ +static int +macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup) +{ + macaddr *arg1 = DatumGetMacaddrP(x); + macaddr *arg2 = DatumGetMacaddrP(y); + + return macaddr_cmp_internal(arg1, arg2); +} + +/* + * SortSupport abbreviated key comparison function. Compares two MAC addresses + * quickly by treating them like integers, and without having to go to the + * heap. + */ +static int +macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup) +{ + if (x > y) + return 1; + else if (x == y) + return 0; + else + return -1; +} + +/* + * Callback for estimating effectiveness of abbreviated key optimization. + * + * We pay no attention to the cardinality of the non-abbreviated data, because + * there is no equality fast-path within the standard macaddr comparator. + */ +static bool +macaddr_abbrev_abort(int memtupcount, SortSupport ssup) +{ + macaddr_sortsupport_state *uss = ssup->ssup_extra; + double abbr_card; + + if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) + return false; + + abbr_card = estimateHyperLogLog(&uss->abbr_card); + + /* + * If we have >100k distinct values, then even if we were sorting many + * billion rows we'd likely still break even, and the penalty of undoing + * that many rows of abbrevs would probably not be worth it. At this point + * we stop counting because we know that we're now fully committed. + */ + if (abbr_card > 100000.0) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: estimation ends at cardinality %f" + " after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count, memtupcount); +#endif + uss->estimating = false; + return false; + } + + /* + * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row + * fudge factor allows us to abort earlier on genuinely pathological data + * where we've had exactly one abbreviated value in the first 2k + * (non-null) rows. + */ + if (abbr_card < uss->input_count / 2000.0 + 0.5) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: aborting abbreviation at cardinality %f" + " below threshold %f after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, + memtupcount); +#endif + return true; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: cardinality %f after " INT64_FORMAT + " values (%d rows)", abbr_card, uss->input_count, memtupcount); +#endif + + return false; +} + +/* + * SortSupport converstion routine. Converts original macaddr representation + * to abbreviated key representation. + * + * Packs as many bytes from a 6-byte MAC addresses into a Datum and treats it + * as an unsigned integer for purposes of comparison. On a 64-bit machine, + * there will be two zeroed bytes of padding. + */ +static Datum +macaddr_abbrev_convert(Datum original, SortSupport ssup) +{ + macaddr_sortsupport_state *uss = ssup->ssup_extra; + macaddr *authoritative = DatumGetMacaddrP(original); + Datum res; + + /* + * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of + * the MAC address in. There will be two bytes of zero padding on the least + * significant end. + */ +#if SIZEOF_DATUM == 8 + memset(&res, 0, SIZEOF_DATUM); + memcpy(&res, authoritative, sizeof(macaddr)); +#else /* SIZEOF_DATUM != 8 */ + memcpy(&res, authoritative, SIZEOF_DATUM); +#endif + uss->input_count += 1; + + /* + * Cardinality estimation. The estimate uses uint32, so on a 64-bit + * architecture, XOR the two 32-bit halves together to produce slightly + * more entropy. The two zeroed bytes won't have any practical impact on + * this operation. + */ + if (uss->estimating) + { + uint32 tmp; + +#if SIZEOF_DATUM == 8 + tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); +#else /* SIZEOF_DATUM != 8 */ + tmp = (uint32) res; +#endif + + addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + } + + /* + * Byteswap on little-endian machines. + * + * This is needed so that macaddr_cmp_abbrev() (an unsigned integer 3-way + * comparator) works correctly on all platforms. Without this, the + * comparator would have to call memcmp() with a pair of pointers to the + * first byte of each abbreviated key, which is slower. + */ + res = DatumBigEndianToNative(res); + + return res; +} diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h index 00320b4..b66c074 100644 --- a/src/include/catalog/pg_amproc.h +++ b/src/include/catalog/pg_amproc.h @@ -117,6 +117,7 @@ DATA(insert ( 1976 20 23 1 2189 )); DATA(insert ( 1976 20 21 1 2193 )); DATA(insert ( 1982 1186 1186 1 1315 )); DATA(insert ( 1984 829 829 1 836 )); +DATA(insert ( 1984 829 829 2 3401 )); DATA(insert ( 1986 19 19 1 359 )); DATA(insert ( 1986 19 19 2 3135 )); DATA(insert ( 1988 1700 1700 1 1769 )); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index af19c1a..2804cd6 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2106,6 +2106,8 @@ DATA(insert OID = 834 ( macaddr_ge PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 DATA(insert OID = 835 ( macaddr_ne PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_ne _null_ _null_ _null_ )); DATA(insert OID = 836 ( macaddr_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_cmp _null_ _null_ _null_ )); DESCR("less-equal-greater"); +DATA(insert OID = 3401 ( macaddr_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ macaddr_sortsupport _null_ _null_ _null_ )); +DESCR("sort support"); DATA(insert OID = 3144 ( macaddr_not PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 829 "829" _null_ _null_ _null_ _null_ _null_ macaddr_not _null_ _null_ _null_ )); DATA(insert OID = 3145 ( macaddr_and PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 829 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_and _null_ _null_ _null_ )); DATA(insert OID = 3146 ( macaddr_or PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 829 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_or _null_ _null_ _null_ )); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index a91be98..fc60b63 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -1018,6 +1018,7 @@ extern Datum macaddr_not(PG_FUNCTION_ARGS); extern Datum macaddr_and(PG_FUNCTION_ARGS); extern Datum macaddr_or(PG_FUNCTION_ARGS); extern Datum macaddr_trunc(PG_FUNCTION_ARGS); +extern Datum macaddr_sortsupport(PG_FUNCTION_ARGS); extern Datum hashmacaddr(PG_FUNCTION_ARGS); /* numeric.c */ -- 2.7.2
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers