From c4e8860b799e43830b3e1eda17ab112b00550365 Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <x4mmm@night.local>
Date: Wed, 20 Mar 2024 22:30:14 +0500
Subject: [PATCH v26] Implement UUID v7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds function for UUID generation. Most important function here
is uuidv7() which generates new UUID according to the new standard.
For code readability this commit adds alias uuidv4() to function
gen_random_uuid().

Author: Andrey Borodin
Reviewed-by: Sergey Prokhorenko, Kirk Wolak, Przemysław Sztoch
Reviewed-by: Nikolay Samokhvalov, Jelte Fennema-Nio, Aleksander Alekseev
Reviewed-by: Peter Eisentraut, Chris Travers, Lukas Fittl
Discussion: https://postgr.es/m/CAAhFRxitJv%3DyoGnXUgeLB_O%2BM7J2BJAmb5jqAT9gZ3bij3uLDA%40mail.gmail.com
---
 doc/src/sgml/func.sgml                   |  19 ++-
 src/backend/utils/adt/uuid.c             | 174 ++++++++++++++++++++++-
 src/include/catalog/pg_proc.dat          |  11 +-
 src/test/regress/expected/opr_sanity.out |  10 +-
 src/test/regress/expected/uuid.out       |  46 +++++-
 src/test/regress/sql/uuid.sql            |  18 ++-
 6 files changed, 263 insertions(+), 15 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 785886af71..6202ce2171 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14160,6 +14160,14 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
    <primary>gen_random_uuid</primary>
   </indexterm>
 
+  <indexterm>
+   <primary>uuidv4</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>uuidv7</primary>
+  </indexterm>
+
   <indexterm>
    <primary>uuid_extract_timestamp</primary>
   </indexterm>
@@ -14169,12 +14177,17 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
   </indexterm>
 
   <para>
-   <productname>PostgreSQL</productname> includes one function to generate a UUID:
+   <productname>PostgreSQL</productname> includes several functions to generate a UUID.
 <synopsis>
 <function>gen_random_uuid</function> () <returnvalue>uuid</returnvalue>
+<function>uuidv4</function> () <returnvalue>uuid</returnvalue>
+</synopsis>
+   These functions return a version 4 (random) UUID.
+<synopsis>
+<function>uuidv7</function> () <returnvalue>uuid</returnvalue>
 </synopsis>
-   This function returns a version 4 (random) UUID.  This is the most commonly
-   used type of UUID and is appropriate for most applications.
+   This function returns a version 7 UUID (UNIX timestamp with 1ms precision +
+   randomly seeded counter + random).
   </para>
 
   <para>
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 45eb1b2fea..470c6fb6b4 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -13,6 +13,8 @@
 
 #include "postgres.h"
 
+#include <sys/time.h>
+
 #include "common/hashfn.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
@@ -32,6 +34,7 @@ typedef struct
 	hyperLogLogState abbr_card; /* cardinality estimator */
 } uuid_sortsupport_state;
 
+static uint64 get_real_time_ns();
 static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext);
 static int	uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
 static int	uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
@@ -407,6 +410,12 @@ uuid_hash_extended(PG_FUNCTION_ARGS)
 	return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
 }
 
+/*
+ * Generate UUID version 4.
+ *
+ * All UUID bytes are filled with strong random numbers except version and
+ * variant 0b10 bits.
+ */
 Datum
 gen_random_uuid(PG_FUNCTION_ARGS)
 {
@@ -427,12 +436,132 @@ gen_random_uuid(PG_FUNCTION_ARGS)
 	PG_RETURN_UUID_P(uuid);
 }
 
-#define UUIDV1_EPOCH_JDATE  2299161 /* == date2j(1582,10,15) */
+
+#ifndef WIN32
+#include <time.h>
+
+static uint64 get_real_time_ns()
+{
+	struct timespec tmp;
+
+	clock_gettime(CLOCK_REALTIME, &tmp);
+	return tmp.tv_sec * 1000000000L + tmp.tv_nsec;
+}
+#else /* WIN32 */
+
+#include "c.h"
+#include <sysinfoapi.h>
+#include <sys/time.h>
+
+/* FILETIME of Jan 1 1970 00:00:00, the PostgreSQL epoch */
+static const unsigned __int64 epoch = UINT64CONST(116444736000000000);
+
+/*
+ * FILETIME represents the number of 100-nanosecond intervals since
+ * January 1, 1601 (UTC).
+ */
+#define FILETIME_UNITS_TO_NS UINT64CONST(100)
+
+
+/*
+ * timezone information is stored outside the kernel so tzp isn't used anymore.
+ *
+ * Note: this function is not for Win32 high precision timing purposes. See
+ * elapsed_time().
+ */
+static uint64
+get_real_time_ns()
+{
+	FILETIME	file_time;
+	ULARGE_INTEGER ularge;
+
+	GetSystemTimePreciseAsFileTime(&file_time);
+	ularge.LowPart = file_time.dwLowDateTime;
+	ularge.HighPart = file_time.dwHighDateTime;
+
+	return (ularge.QuadPart - epoch) * FILETIME_UNITS_TO_NS;
+}
+#endif
+
+/*
+ * Generate UUID version 7 per RFC 9562.
+ *
+ * Monotonicity (regarding generation on given backend) is ensured with method
+ * "Replace Leftmost Random Bits with Increased Clock Precision (Method 3)"
+ * We use 12 bits in "rand_a" bits to store 1/4096 fractions of millisecond.
+ * Usage of pg_testtime indicates that such precision is avaiable on most
+ * systems. If timestamp is not advancing between two consecutive UUID
+ * generations, previous timestamp is incremented and used instead of current
+ * timestamp.
+ */
+Datum
+uuidv7(PG_FUNCTION_ARGS)
+{
+	static uint64 previous_ns = 0;
+
+	pg_uuid_t	*uuid = palloc(UUID_LEN);
+	uint64		 ns;
+	uint64		 unix_ts_ms;
+	uint16 		 incresed_clock_precision;
+
+	ns = get_real_time_ns();
+	if (previous_ns >= ns)
+		ns++;
+	previous_ns = ns;
+
+	if (PG_NARGS() > 0)
+	{
+		Interval *span;
+		TimestampTz ts = (TimestampTz) (ns / 1000) -
+			(POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+		span = PG_GETARG_INTERVAL_P(0);
+		ts = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_pl_interval,
+													 TimestampTzGetDatum(ts),
+													 IntervalPGetDatum(span)));
+		ns = (ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC)
+			* 1000 + ns % 1000;
+	}
+	
+	unix_ts_ms = ns / 1000000;
+
+	/* Fill in time part */
+	uuid->data[0] = (unsigned char) (unix_ts_ms >> 40);
+	uuid->data[1] = (unsigned char) (unix_ts_ms >> 32);
+	uuid->data[2] = (unsigned char) (unix_ts_ms >> 24);
+	uuid->data[3] = (unsigned char) (unix_ts_ms >> 16);
+	uuid->data[4] = (unsigned char) (unix_ts_ms >> 8);
+	uuid->data[5] = (unsigned char) unix_ts_ms;
+
+	/* sub-millisecond timestamp fraction (12 bits) */
+	incresed_clock_precision = ((ns % 1000000) * 4096) / 1000000;
+
+	uuid->data[6] = (unsigned char) (incresed_clock_precision >> 8);
+	uuid->data[7] = (unsigned char) (incresed_clock_precision);
+
+	/* fill everything after the increased clock precision with random bytes */
+	if (!pg_strong_random(&uuid->data[6], UUID_LEN - 6))
+		ereport(ERROR,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errmsg("could not generate random values")));
+
+	/*
+	 * Set magic numbers for a "version 7" (pseudorandom) UUID, see
+	 * https://www.rfc-editor.org/rfc/rfc9562#name-version-field
+	 */
+	/* set version field, top four bits are 0, 1, 1, 1 */
+	uuid->data[6] = (uuid->data[6] & 0x0f) | 0x70;
+	/* set variant field, top two bits are 1, 0 */
+	uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;
+
+	PG_RETURN_UUID_P(uuid);
+}
+
+#define GREGORIAN_EPOCH_JDATE  2299161 /* == date2j(1582,10,15) */
 
 /*
  * Extract timestamp from UUID.
  *
- * Returns null if not RFC 4122 variant or not a version that has a timestamp.
+ * Returns null if not RFC 9562 variant or not a version that has a timestamp.
  */
 Datum
 uuid_extract_timestamp(PG_FUNCTION_ARGS)
@@ -442,7 +571,7 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
 	uint64		tms;
 	TimestampTz ts;
 
-	/* check if RFC 4122 variant */
+	/* check if RFC 9562 variant */
 	if ((uuid->data[8] & 0xc0) != 0x80)
 		PG_RETURN_NULL();
 
@@ -461,7 +590,40 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
 
 		/* convert 100-ns intervals to us, then adjust */
 		ts = (TimestampTz) (tms / 10) -
-			((uint64) POSTGRES_EPOCH_JDATE - UUIDV1_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+			((uint64) POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+		PG_RETURN_TIMESTAMPTZ(ts);
+	}
+
+	if (version == 6)
+	{
+		tms = ((uint64) uuid->data[0]) << 52;
+		tms += ((uint64) uuid->data[1]) << 44;
+		tms += ((uint64) uuid->data[2]) << 36;
+		tms += ((uint64) uuid->data[3]) << 28;
+		tms += ((uint64) uuid->data[4]) << 20;
+		tms += ((uint64) uuid->data[5]) << 12;
+		tms += (((uint64) uuid->data[6]) & 0xf) << 8;
+		tms += ((uint64) uuid->data[7]);
+
+		/* convert 100-ns intervals to us, then adjust */
+		ts = (TimestampTz) (tms / 10) -
+			((uint64) POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+
+		PG_RETURN_TIMESTAMPTZ(ts);
+	}
+
+	if (version == 7)
+	{
+		tms = uuid->data[5];
+		tms += ((uint64) uuid->data[4]) << 8;
+		tms += ((uint64) uuid->data[3]) << 16;
+		tms += ((uint64) uuid->data[2]) << 24;
+		tms += ((uint64) uuid->data[1]) << 32;
+		tms += ((uint64) uuid->data[0]) << 40;
+
+		/* convert ms to us, then adjust */
+		ts = (TimestampTz) (tms * 1000) -
+			(POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
 
 		PG_RETURN_TIMESTAMPTZ(ts);
 	}
@@ -473,7 +635,7 @@ uuid_extract_timestamp(PG_FUNCTION_ARGS)
 /*
  * Extract UUID version.
  *
- * Returns null if not RFC 4122 variant.
+ * Returns null if not RFC 9562 variant.
  */
 Datum
 uuid_extract_version(PG_FUNCTION_ARGS)
@@ -481,7 +643,7 @@ uuid_extract_version(PG_FUNCTION_ARGS)
 	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
 	uint16		version;
 
-	/* check if RFC 4122 variant */
+	/* check if RFC 9562 variant */
 	if ((uuid->data[8] & 0xc0) != 0x80)
 		PG_RETURN_NULL();
 
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 73d9cf8582..568f6833c0 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9207,11 +9207,20 @@
 { oid => '3432', descr => 'generate random UUID',
   proname => 'gen_random_uuid', proleakproof => 't', provolatile => 'v',
   prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9895', descr => 'generate UUID version 4',
+  proname => 'uuidv4', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9896', descr => 'generate UUID version 7',
+  proname => 'uuidv7', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' },
+{ oid => '9897', descr => 'generate UUID version 7',
+  proname => 'uuidv7', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => 'interval', prosrc => 'uuidv7' },
 { oid => '6342', descr => 'extract timestamp from UUID',
   proname => 'uuid_extract_timestamp', proleakproof => 't',
   prorettype => 'timestamptz', proargtypes => 'uuid',
   prosrc => 'uuid_extract_timestamp' },
-{ oid => '6343', descr => 'extract version from RFC 4122 UUID',
+{ oid => '6343', descr => 'extract version from RFC 9562 UUID',
   proname => 'uuid_extract_version', proleakproof => 't', prorettype => 'int2',
   proargtypes => 'uuid', prosrc => 'uuid_extract_version' },
 
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d047b21b8..7b308d4333 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -126,9 +126,10 @@ WHERE p1.oid < p2.oid AND
      p1.proretset != p2.proretset OR
      p1.provolatile != p2.provolatile OR
      p1.pronargs != p2.pronargs);
- oid | proname | oid | proname 
------+---------+-----+---------
-(0 rows)
+ oid  | proname | oid  | proname 
+------+---------+------+---------
+ 9896 | uuidv7  | 9897 | uuidv7
+(1 row)
 
 -- Look for uses of different type OIDs in the argument/result type fields
 -- for different aliases of the same built-in function.
@@ -874,6 +875,9 @@ xid8ne(xid8,xid8)
 xid8cmp(xid8,xid8)
 uuid_extract_timestamp(uuid)
 uuid_extract_version(uuid)
+uuidv4()
+uuidv7()
+uuidv7(interval)
 -- restore normal output mode
 \a\t
 -- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 6026e15ed3..7c39a25224 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -168,6 +168,26 @@ SELECT count(DISTINCT guid_field) FROM guid1;
      2
 (1 row)
 
+-- test of uuidv4() alias
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
 -- extract functions
 -- version
 SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111');  -- 5
@@ -188,8 +208,32 @@ SELECT uuid_extract_version('11111111-1111-1111-1111-111111111111');  -- null
                      
 (1 row)
 
+SELECT uuid_extract_version(uuidv4()); --4
+ uuid_extract_version 
+----------------------
+                    4
+(1 row)
+
+SELECT uuid_extract_version(uuidv7()); --7
+ uuid_extract_version 
+----------------------
+                    7
+(1 row)
+
 -- timestamp
-SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';  -- RFC 4122bis test vector
+SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v1
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT uuid_extract_timestamp('1EC9414C-232A-6B00-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v6
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v7
  ?column? 
 ----------
  t
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index c88f6d087a..cfae3f8cd1 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -85,6 +85,18 @@ INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 SELECT count(DISTINCT guid_field) FROM guid1;
 
+-- test of uuidv4() alias
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
 
 -- extract functions
 
@@ -92,9 +104,13 @@ SELECT count(DISTINCT guid_field) FROM guid1;
 SELECT uuid_extract_version('11111111-1111-5111-8111-111111111111');  -- 5
 SELECT uuid_extract_version(gen_random_uuid());  -- 4
 SELECT uuid_extract_version('11111111-1111-1111-1111-111111111111');  -- null
+SELECT uuid_extract_version(uuidv4()); --4
+SELECT uuid_extract_version(uuidv7()); --7
 
 -- timestamp
-SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';  -- RFC 4122bis test vector
+SELECT uuid_extract_timestamp('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v1
+SELECT uuid_extract_timestamp('1EC9414C-232A-6B00-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v6
+SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00'; -- RFC 9562 test vector for v7
 SELECT uuid_extract_timestamp(gen_random_uuid());  -- null
 SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111');  -- null
 
-- 
2.39.3 (Apple Git-146)

