On Sat, Jan 11, 2025 at 3:46 AM Nathan Bossart <nathandboss...@gmail.com> wrote: > > I was able to get auto-vectorization to take effect on Apple clang 16 with > the following addition to src/backend/utils/adt/Makefile: > > encode.o: CFLAGS += ${CFLAGS_VECTORIZE} -mllvm -force-vector-width=8 > > This gave the following results with your hex_encode_test() function: > > buf | HEAD | patch | % diff > -------+-------+-------+-------- > 16 | 21 | 16 | 24 > 64 | 54 | 41 | 24 > 256 | 138 | 100 | 28 > 1024 | 441 | 300 | 32 > 4096 | 1671 | 1106 | 34 > 16384 | 6890 | 4570 | 34 > 65536 | 27393 | 18054 | 34
We can do about as well simply by changing the nibble lookup to a byte lookup, which works on every compiler and architecture: select hex_encode_test(1000000, 1024); master: Time: 1158.700 ms v2: Time: 777.443 ms If we need to do much better than this, it seems better to send the data to the client as binary, if possible. -- John Naylor Amazon Web Services
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 4a6fcb56cd..8b059bc834 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -145,7 +145,7 @@ binary_decode(PG_FUNCTION_ARGS) * HEX */ -static const char hextbl[] = "0123456789abcdef"; +static const char hextbl[512] = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -165,9 +165,8 @@ hex_encode(const char *src, size_t len, char *dst) while (src < end) { - *dst++ = hextbl[(*src >> 4) & 0xF]; - *dst++ = hextbl[*src & 0xF]; - src++; + memcpy(dst, &hextbl[(* ((unsigned char *) src)) * 2], 2); + src++; dst+=2; } return (uint64) len * 2; }