Folks, Please find attached a couple of patches intended to $subject.
This patch set cut the time to copy ten million rows of randomly sized int8s (10 of them) by about a third, so at least for that case, it's pretty decent. Thanks to Andrew Gierth for lots of patient help. Best, David. -- David Fetter <david(at)fetter(dot)org> http://fetter.org/ Phone: +1 415 235 3778 Remember to vote! Consider donating to Postgres: http://www.postgresql.org/about/donate
>From 6e8136ece5b01ca9cd16bdb974c4d54e939c92cf Mon Sep 17 00:00:00 2001 From: David Fetter <da...@fetter.org> Date: Tue, 10 Sep 2019 02:06:31 -0700 Subject: [PATCH v1 1/2] Output digits two at a time in sprintf.c To: hackers MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------2.21.0" This is a multi-part message in MIME format. --------------2.21.0 Content-Type: text/plain; charset=UTF-8; format=fixed Content-Transfer-Encoding: 8bit diff --git a/src/port/snprintf.c b/src/port/snprintf.c index 8fd997553e..fd9d384144 100644 --- a/src/port/snprintf.c +++ b/src/port/snprintf.c @@ -1014,9 +1014,60 @@ fmtint(long long value, char type, int forcesign, int leftjust, PrintfTarget *target) { unsigned long long base; + unsigned long long square; unsigned long long uvalue; int dosign; - const char *cvt = "0123456789abcdef"; + /* Maps for octal, decimal, and two flavors of hexadecimal */ + const char *digits; + const char decimal_digits[200] = + /* 10^2 * 2 decimal digits */ + "0001020304050607080910111213141516171819" + "2021222324252627282930313233343536373839" + "4041424344454647484950515253545556575859" + "6061626364656667686970717273747576777879" + "8081828384858687888990919293949596979899"; + const char octal_digits[128] = + /* 8^2 * 2 octal digits */ + "00010203040506071011121314151617" + "20212223242526273031323334353637" + "40414243444546475051525354555657" + "60616263646566677071727374757677"; + /* 16^2 * 2 hex digits */ + const char hex_lower_digits[512] = + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; + const char hex_upper_digits[512] = + /* 16^2 * 2 HEX DIGITS */ + "000102030405060708090A0B0C0D0E0F" + "101112131415161718191A1B1C1D1E1F" + "202122232425262728292A2B2C2D2E2F" + "303132333435363738393A3B3C3D3E3F" + "404142434445464748494A4B4C4D4E4F" + "505152535455565758595A5B5C5D5E5F" + "606162636465666768696A6B6C6D6E6F" + "707172737475767778797A7B7C7D7E7F" + "808182838485868788898A8B8C8D8E8F" + "909192939495969798999A9B9C9D9E9F" + "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF" + "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF" + "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF" + "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF" + "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF" + "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF"; int signvalue = 0; char convert[64]; int vallen = 0; @@ -1027,23 +1078,27 @@ fmtint(long long value, char type, int forcesign, int leftjust, { case 'd': case 'i': + digits = decimal_digits; base = 10; dosign = 1; break; case 'o': + digits = octal_digits; base = 8; dosign = 0; break; case 'u': + digits = decimal_digits; base = 10; dosign = 0; break; case 'x': + digits = hex_lower_digits; base = 16; dosign = 0; break; case 'X': - cvt = "0123456789ABCDEF"; + digits = hex_upper_digits; base = 16; dosign = 0; break; @@ -1051,6 +1106,8 @@ fmtint(long long value, char type, int forcesign, int leftjust, return; /* keep compiler quiet */ } + square = base * base; + /* disable MSVC warning about applying unary minus to an unsigned value */ #if _MSC_VER #pragma warning(push) @@ -1073,12 +1130,20 @@ fmtint(long long value, char type, int forcesign, int leftjust, vallen = 0; else { - /* make integer string */ - do + /* make integer string, two digits at a time */ + while(uvalue >= base) { - convert[sizeof(convert) - (++vallen)] = cvt[uvalue % base]; - uvalue = uvalue / base; - } while (uvalue); + const int i = (uvalue % square) * 2; + uvalue /= square; + vallen += 2; + memcpy(convert + sizeof(convert) - vallen, digits + i, 2); + } + /* Account for single digit */ + if (uvalue > 0 || vallen == 0) + { + vallen++; + memcpy(convert + sizeof(convert) - vallen, digits + uvalue * 2 + 1, 1); + } } zeropad = Max(0, precision - vallen); --------------2.21.0--
>From f4a3729900484292cce5066be6a6f183f489ae8c Mon Sep 17 00:00:00 2001 From: David Fetter <da...@fetter.org> Date: Sun, 15 Sep 2019 00:06:29 -0700 Subject: [PATCH v1 2/2] Made int8 operations more efficent To: hackers MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------2.21.0" This is a multi-part message in MIME format. --------------2.21.0 Content-Type: text/plain; charset=UTF-8; format=fixed Content-Transfer-Encoding: 8bit - Output routines now do more digits per iteration, and - Code determines the number of decimal digits in an int8 efficiently diff --git a/src/backend/access/common/printsimple.c b/src/backend/access/common/printsimple.c index 651ade14dd..17ca533b87 100644 --- a/src/backend/access/common/printsimple.c +++ b/src/backend/access/common/printsimple.c @@ -112,7 +112,7 @@ printsimple(TupleTableSlot *slot, DestReceiver *self) case INT8OID: { int64 num = DatumGetInt64(value); - char str[23]; /* sign, 21 digits and '\0' */ + char str[MAXINT8LEN]; pg_lltoa(num, str); pq_sendcountedtext(&buf, str, strlen(str), false); diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 580043233b..3818dbaa85 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -39,6 +39,8 @@ jsonpath_scan.c: FLEX_NO_BACKUP=yes # jsonpath_scan is compiled as part of jsonpath_gram jsonpath_gram.o: jsonpath_scan.c +numutils.o: CFLAGS += $(PERMIT_DECLARATION_AFTER_STATEMENT) + # jsonpath_gram.c and jsonpath_scan.c are in the distribution tarball, # so they are not cleaned here. clean distclean maintainer-clean: diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 0ff9394a2f..6230807906 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -27,8 +27,6 @@ #include "utils/builtins.h" -#define MAXINT8LEN 25 - typedef struct { int64 current; diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 70138feb29..f75faa9255 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -20,6 +20,44 @@ #include "common/int.h" #include "utils/builtins.h" +#include "port/pg_bitutils.h" + +/* + * A table of all two-digit numbers. This is used to speed up decimal digit + * generation by copying pairs of digits into the final output. + */ +static const char DIGIT_TABLE[200] = { + '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', + '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', + '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', + '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', + '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', + '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', + '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', + '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', + '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', + '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' +}; + +/* + * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 + */ +static inline uint32 +decimalLength(const uint64 v) +{ + uint32 t; + static uint64 PowersOfTen[] = + {1, 10, 100, + 1000, 10000, 100000, + 1000000, 10000000, 100000000, + 1000000000, 10000000000, 100000000000, + 1000000000000, 10000000000000, 100000000000000, + 1000000000000000, 10000000000000000, 100000000000000000, + 1000000000000000000}; + + t = (pg_leftmost_one_pos64(v) + 1)*1233/4096; + return t + (v >= PowersOfTen[t]); +} /* * pg_atoi: convert string to integer @@ -333,13 +371,13 @@ pg_ltoa(int32 value, char *a) * pg_lltoa: convert a signed 64-bit integer to its string representation * * Caller must ensure that 'a' points to enough memory to hold the result - * (at least MAXINT8LEN+1 bytes, counting a leading sign and trailing NUL). + * (at least MAXINT8LEN bytes, counting a leading sign and trailing NUL). */ void pg_lltoa(int64 value, char *a) { - char *start = a; - bool neg = false; + uint32 olength; + uint32 i = 0; /* * Avoid problems with the most negative integer not being representable @@ -350,37 +388,83 @@ pg_lltoa(int64 value, char *a) memcpy(a, "-9223372036854775808", 21); return; } - else if (value < 0) + + /* Might as well handle this case, too */ + if (value == 0) + { + memcpy(a, "0", 2); + return; + } + + if (value < 0) { value = -value; - neg = true; - } - - /* Compute the result string backwards. */ - do - { - int64 remainder; - int64 oldval = value; - - value /= 10; - remainder = oldval - value * 10; - *a++ = '0' + remainder; - } while (value != 0); - - if (neg) *a++ = '-'; + } + + olength = decimalLength(value); + + /* Compute the result string. */ + while (value >= 100000000) + { + /* Expensive 64-bit division. Optimize? */ + const uint64 q = value / 100000000; + uint32 value2 = (uint32) (value - 100000000 * q); + + value = q; + + const uint32 c = value2 % 10000; + const uint32 d = value2 / 10000; + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + const uint32 d0 = (d % 100) << 1; + const uint32 d1 = (d / 100) << 1; + + value = q; + + memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2); + memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2); + memcpy(a + olength - i - 6, DIGIT_TABLE + d0, 2); + memcpy(a + olength - i - 8, DIGIT_TABLE + d1, 2); + i += 8; + } + + uint32 value2 = (uint32) value; - /* Add trailing NUL byte, and back up 'a' to the last character. */ - *a-- = '\0'; + while (value2 >= 10000) + { + const uint32 c = value2 - 10000 * (value2 / 10000); + + value2 /= 10000; + + const uint32 c0 = (c % 100) << 1; + const uint32 c1 = (c / 100) << 1; + + memcpy(a + olength - i - 2, DIGIT_TABLE + c0, 2); + memcpy(a + olength - i - 4, DIGIT_TABLE + c1, 2); + i += 4; + } + if (value2 >= 100) + { + const uint32 c = (value2 % 100) << 1; - /* Reverse string. */ - while (start < a) + value2 /= 100; + memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2); + i += 2; + } + if (value2 >= 10) { - char swap = *start; + const uint32 c = value2 << 1; - *start++ = *a; - *a-- = swap; + memcpy(a + olength - i - 2, DIGIT_TABLE + c, 2); + i += 2; } + else + { + *a = (char) ('0' + value2); + } + + a[olength] = '\0'; } diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 937ddb7ef0..9e8392741e 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -18,6 +18,7 @@ #include "nodes/nodes.h" #include "utils/fmgrprotos.h" +#define MAXINT8LEN 21 /* bool.c */ extern bool parse_bool(const char *value, bool *result); --------------2.21.0--