Hi all, I'd like to propose the $subject.
Since commit ec8719ccbfcd made hex_decode_safe() SIMD-aware, decoding a run of hex digits is now fast. The attached patch reuses hex_decode_safe() in the UUID input function to speed up parsing. We accept several textual forms of a UUID[1]. The fast path handles the common ones: 32 hex digits, the canonical 8x-4x-4x-4x-12x form (where "nx" means n hex digits), and either of those wrapped in braces. Otherwise, it falls back to the ordinary scalar UUID parse. I've benchmarked the parse speed using the following query: CREATE TEMP TABLE u AS SELECT gen_random_uuid()::text AS t FROM generate_series(1, 1000000); EXPLAIN (ANALYZE, TIMING OFF) SELECT t::uuid FROM u; I compared the execution time of the second query, which measures uuid_in() alone, with/without SIMD optimization. Here are results (the median of 5 runs): HEAD: 208.879 ms Patched: 40.983 ms The improvements look promising to me. But in a realistic pipeline the parse is a small fraction of the work, so end-to-end gains could be much smaller. Feedback is very welcome. Regards, [1] https://www.postgresql.org/docs/devel/datatype-uuid.html#DATATYPE-UUID -- Masahiko Sawada Amazon Web Services: https://aws.amazon.com
From 72e59ea260aaf845cd856cbdf36a2502694c298a Mon Sep 17 00:00:00 2001 From: Masahiko Sawada <[email protected]> Date: Thu, 25 Jun 2026 10:03:44 -0700 Subject: [PATCH v1] Optimize UUID parse using SIMD. Author: Reviewed-by: Discussion: https://postgr.es/m/ --- src/backend/utils/adt/uuid.c | 92 ++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c index 6ee3752ac78..08272527669 100644 --- a/src/backend/utils/adt/uuid.c +++ b/src/backend/utils/adt/uuid.c @@ -19,7 +19,9 @@ #include "common/hashfn.h" #include "lib/hyperloglog.h" #include "libpq/pqformat.h" +#include "nodes/miscnodes.h" #include "port/pg_bswap.h" +#include "utils/builtins.h" #include "utils/fmgrprotos.h" #include "utils/guc.h" #include "utils/skipsupport.h" @@ -122,13 +124,10 @@ uuid_out(PG_FUNCTION_ARGS) } /* - * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash - * after each group of 4 hexadecimal digits, and optionally surrounded by {}. - * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal - * digits, is the only one used for output.) + * General UUID parser. */ static void -string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext) +string_to_uuid_scalar(const char *source, pg_uuid_t *uuid, Node *escontext) { const char *src = source; bool braces = false; @@ -177,6 +176,89 @@ syntax_error: "uuid", source))); } +/* + * SIMD fast path. Handles the four common shapes directly and delegates everything + * else to string_to_uuid_scalar(). + */ +#ifndef USE_NO_SIMD +static void +string_to_uuid_simd(const char *source, pg_uuid_t *uuid, Node *escontext) +{ + const char *body = source; + size_t len = strlen(source); + const char *hexsrc = NULL; + char hexbuf[32]; + ErrorSaveContext esctx = {T_ErrorSaveContext}; + + /* Strip one optional surrounding brace pair */ + if (len >= 2 && source[0] == '{' && source[len - 1] == '}') + { + body = source + 1; + len -= 2; + } + + if (len == 32) + { + /* + * Body is already 32 contiguous hex digits -- decode straight from + * the input. hex_decode_safe() reads exactly body[0..31], so it never + * touches the trailing NULL or '}'. + */ + hexsrc = body; + } + else if (len == 36 && + body[8] == '-' && body[13] == '-' && body[18] == '-' && + body[23] == '-') + { + /* + * The canonical format 8x-4x-4x-4x-12x format. Compact them into + * hexbuf with fixed-offset copies. + */ + memcpy(&hexbuf[0], &body[0], 8); + memcpy(&hexbuf[8], &body[9], 4); + memcpy(&hexbuf[12], &body[14], 4); + memcpy(&hexbuf[16], &body[19], 4); + memcpy(&hexbuf[20], &body[24], 12); + hexsrc = hexbuf; + } + + if (hexsrc == NULL) + { + /* Uncommon shape; let the general parse handle it */ + string_to_uuid_scalar(source, uuid, escontext); + return; + } + + /* + * Decode the UUID hex data using our hex decoder that is SIMD-aware. We + * pass the local esctx instead of escontext to hex_decode_safe() to + * shallow any raised by hex_decode_safe(), then fall back to the general + * UUID parser for the correct error. + */ + (void) hex_decode_safe(hexsrc, 32, (char *) uuid->data, (Node *) &esctx); + + if (esctx.error_occurred) + string_to_uuid_scalar(source, uuid, escontext); +} +#endif + +/* + * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash + * after each group of 4 hexadecimal digits, and optionally surrounded by {}. + * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal + * digits, is the only one used for output.) + */ +static void +string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext) +{ +#ifdef USE_NO_SIMD + string_to_uuid_scalar(source, uuid, escontext); +#else + string_to_uuid_simd(source, uuid, escontext); +#endif +} + + Datum uuid_recv(PG_FUNCTION_ARGS) { -- 2.54.0
