From 148750ca11235bc24ef07ceb549b910ba2a862c2 Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <x4mmm@night.local>
Date: Sun, 20 Aug 2023 23:55:31 +0300
Subject: [PATCH v17] Implement UUID v7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds function for UUID generation. Most important function here
is uuidv7() which generates new UUID according to the new standard.
For code readability this commit adds alias uuidv4() to function gen_random_uuid().

Also we add a function to extract timestamp from UUID v1, v6 and v7.
To allow user to distinguish various UUID versions and variants
we add functions uuid_extract_ver() and uuid_extract_var().

Author: Andrey Borodin
Reviewers: Sergey Prokhorenko, Kirk Wolak, Przemysław Sztoch
Reviewers: Nikolay Samokhvalov, Jelte Fennema-Nio, Aleksander Alekseev
Reviewers: Peter Eisentraut, Chris Travers, Lukas Fittl
Discussion: https://postgr.es/m/CAAhFRxitJv%3DyoGnXUgeLB_O%2BM7J2BJAmb5jqAT9gZ3bij3uLDA%40mail.gmail.com
---
 doc/src/sgml/func.sgml                   |  61 +++++++-
 src/backend/utils/adt/uuid.c             | 178 +++++++++++++++++++++++
 src/include/catalog/pg_proc.dat          |  15 ++
 src/include/datatype/timestamp.h         |   3 +-
 src/test/regress/expected/opr_sanity.out |   5 +
 src/test/regress/expected/uuid.out       |  71 +++++++++
 src/test/regress/sql/uuid.sql            |  26 ++++
 7 files changed, 355 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 6788ba8ef4a..97abf7f4c69 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14128,13 +14128,68 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
    <primary>gen_random_uuid</primary>
   </indexterm>
 
+  <indexterm>
+   <primary>uuidv4</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>uuidv7</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>uuid_extract_time</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>uuid_extract_ver</primary>
+  </indexterm>
+
+  <indexterm>
+   <primary>uuid_extract_var</primary>
+  </indexterm>
+
   <para>
-   <productname>PostgreSQL</productname> includes one function to generate a UUID:
+   <productname>PostgreSQL</productname> includes several functions to generate a UUID:
+   <function>gen_random_uuid</function>, <function>uuidv4</function>, and <function>uuidv7</function>.
 <synopsis>
 <function>gen_random_uuid</function> () <returnvalue>uuid</returnvalue>
+<function>uuidv4</function> () <returnvalue>uuid</returnvalue>
+</synopsis>
+   These functions return a version 4 (random) UUID. UUIDv4 is one of the
+   most commonly used types of UUID. It is appropriate when random
+   distribution of keys does not affect performance of an application or
+   when exposing the generation time of a UUID has unacceptable security
+   or business intelligence implications.
+<synopsis>
+<function>uuidv7</function> () <returnvalue>uuid</returnvalue>
+</synopsis>
+   This function returns a version 7 UUID (UNIX timestamp with 1ms precision +
+   randomly seeded counter + random). It provides much better data locality
+   than UUIDv4, which can greatly improve performance when UUID is used in a
+   B-tree index (the default index type in PostgreSQL). To achieve this data
+   locality, UUIDv7 embeds its own generation time into the UUID. If exposing
+   such a timestamp has unacceptable security or business intelligence
+   implications, then uuidv4() should be used instead.
+<synopsis>
+<function>uuid_extract_time</function> (uuid) <returnvalue>timestamptz</returnvalue>
+</synopsis>
+   This function extracts a timestamptz from UUID versions 1, 6 and 7. For other
+   versions and variants this function returns NULL. The extracted timestamp
+   does not necessarily equate to the time of UUID generation. How close it is
+   to the actual time depends on the implementation that generated to UUID.
+   The uuidv7() function provided by PostgreSQL will normally store the actual time,
+   with some exceptions: prevention of time leaps backwards and counter overflow
+   being carried to time step.
+<synopsis>
+<function>uuid_extract_ver</function> (uuid) <returnvalue>int2</returnvalue>
+</synopsis>
+   This function extracts a version bits from UUID of variant described by
+   <ulink url="https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis">IETF standard</ulink>
+   (b10xx variant). For other variants this function returns NULL.
+<synopsis>
+<function>uuid_extract_var</function> (uuid) <returnvalue>int2</returnvalue>
 </synopsis>
-   This function returns a version 4 (random) UUID.  This is the most commonly
-   used type of UUID and is appropriate for most applications.
+   This function extracts a vartiant bits from UUID.
   </para>
 
   <para>
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 73dfd711c73..a157f69c2b7 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -13,13 +13,18 @@
 
 #include "postgres.h"
 
+#include <sys/time.h>
+
+#include "access/xlog.h"
 #include "common/hashfn.h"
 #include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
 #include "port/pg_bswap.h"
 #include "utils/builtins.h"
+#include "utils/datetime.h"
 #include "utils/guc.h"
 #include "utils/sortsupport.h"
+#include "utils/timestamp.h"
 #include "utils/uuid.h"
 
 /* sortsupport for uuid */
@@ -421,3 +426,176 @@ gen_random_uuid(PG_FUNCTION_ARGS)
 
 	PG_RETURN_UUID_P(uuid);
 }
+
+static uint32_t sequence_counter;
+static uint64_t previous_timestamp = 0;
+
+
+Datum
+uuidv7(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = palloc(UUID_LEN);
+	uint64_t tms;
+	struct timeval tp;
+	bool increment_counter;
+
+	gettimeofday(&tp, NULL);
+	tms = ((uint64_t)tp.tv_sec) * 1000 + (tp.tv_usec) / 1000;
+	/* time from clock is protected from backward leaps */
+	increment_counter = (tms <= previous_timestamp);
+
+	if (increment_counter)
+	{
+		/* Time did not advance from the previous generation, we must increment counter */
+		++sequence_counter;
+		if (sequence_counter > 0x3ffff)
+		{
+			/* We only have 18-bit counter */
+			sequence_counter = 0;
+			previous_timestamp++;
+		}
+
+		/* protection from leap backward */
+		tms = previous_timestamp;
+
+		/* fill everything after the timestamp and counter with random bytes */
+		if (!pg_strong_random(&uuid->data[8], UUID_LEN - 8))
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					errmsg("could not generate random values")));
+
+		/* most significant 4 bits of 18-bit counter */
+		uuid->data[6] = (unsigned char)(sequence_counter >> 14);
+		/* next 8 bits */
+		uuid->data[7] = (unsigned char)(sequence_counter >> 6);
+		/* least significant 6 bits */
+		uuid->data[8] = (unsigned char)(sequence_counter);
+	}
+	else
+	{
+		/* fill everything after the timestamp with random bytes */
+		if (!pg_strong_random(&uuid->data[6], UUID_LEN - 6))
+			ereport(ERROR,
+					(errcode(ERRCODE_INTERNAL_ERROR),
+					errmsg("could not generate random values")));
+
+		/*
+		 * Left-most counter bits are initialized as zero for the sole purpose
+		 * of guarding against counter rollovers.
+		 * See section "Fixed-Length Dedicated Counter Seeding"
+		 * https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis-09#monotonicity_counters
+		 */
+		uuid->data[6] = (uuid->data[6] & 0xf7);
+
+		/* read randomly initialized bits of counter */
+		sequence_counter = ((uint32_t)uuid->data[8] & 0x3f) +
+							(((uint32_t)uuid->data[7]) << 6) +
+							(((uint32_t)uuid->data[6] & 0x0f) << 14);
+
+		previous_timestamp = tms;
+	}
+
+	/* Fill in time part */
+	uuid->data[0] = (unsigned char)(tms >> 40);
+	uuid->data[1] = (unsigned char)(tms >> 32);
+	uuid->data[2] = (unsigned char)(tms >> 24);
+	uuid->data[3] = (unsigned char)(tms >> 16);
+	uuid->data[4] = (unsigned char)(tms >> 8);
+	uuid->data[5] = (unsigned char)tms;
+
+	/*
+	 * Set magic numbers for a "version 7" (pseudorandom) UUID, see
+	 * https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis
+	 */
+	/* set version field, top four bits are 0, 1, 1, 1 */
+	uuid->data[6] = (uuid->data[6] & 0x0f) | 0x70;
+	/* set variant field, top two bits are 1, 0 */
+	uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;
+
+	PG_RETURN_UUID_P(uuid);
+}
+
+Datum
+uuid_extract_time(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
+	TimestampTz ts;
+	uint64_t tms;
+
+	if ((uuid->data[8] & 0xc0) != 0x80)
+		PG_RETURN_NULL();
+
+	if ((uuid->data[6] & 0xf0) == 0x70)
+	{
+		tms =			  uuid->data[5];
+		tms += ((uint64_t)uuid->data[4]) << 8;
+		tms += ((uint64_t)uuid->data[3]) << 16;
+		tms += ((uint64_t)uuid->data[2]) << 24;
+		tms += ((uint64_t)uuid->data[1]) << 32;
+		tms += ((uint64_t)uuid->data[0]) << 40;
+
+		ts = (TimestampTz) (tms * 1000) - /* convert ms to us, than adjust */
+			(POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+
+		PG_RETURN_TIMESTAMPTZ(ts);
+	}
+
+	if ((uuid->data[6] & 0xf0) == 0x10)
+	{
+		tms =  ((uint64_t)uuid->data[0]) << 24;
+		tms += ((uint64_t)uuid->data[1]) << 16;
+		tms += ((uint64_t)uuid->data[2]) << 8;
+		tms += ((uint64_t)uuid->data[3]);
+		tms += ((uint64_t)uuid->data[4]) << 40;
+		tms += ((uint64_t)uuid->data[5]) << 32;
+		tms += (((uint64_t)uuid->data[6])&0xf) << 56;
+		tms += ((uint64_t)uuid->data[7]) << 48;
+
+		ts = (TimestampTz) (tms / 10) - /* convert 100-ns intervals to us, than adjust */
+			((uint64_t)POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+
+		PG_RETURN_TIMESTAMPTZ(ts);
+	}
+
+	if ((uuid->data[6] & 0xf0) == 0x60)
+	{
+		tms =  ((uint64_t)uuid->data[0]) << 52;
+		tms += ((uint64_t)uuid->data[1]) << 44;
+		tms += ((uint64_t)uuid->data[2]) << 36;
+		tms += ((uint64_t)uuid->data[3]) << 28;
+		tms += ((uint64_t)uuid->data[4]) << 20;
+		tms += ((uint64_t)uuid->data[5]) << 12;
+		tms += (((uint64_t)uuid->data[6])&0xf) << 8;
+		tms += ((uint64_t)uuid->data[7]);
+
+		ts = (TimestampTz) (tms / 10) - /* convert 100-ns intervals to us, than adjust */
+			((uint64_t)POSTGRES_EPOCH_JDATE - GREGORIAN_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC;
+
+		PG_RETURN_TIMESTAMPTZ(ts);
+	}
+
+	PG_RETURN_NULL();
+}
+
+Datum
+uuid_extract_ver(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
+	uint16_t result;
+
+	if ((uuid->data[8] & 0xc0) != 0x80)
+		PG_RETURN_NULL();
+	result = uuid->data[6] >> 4;
+
+	PG_RETURN_UINT16(result);
+}
+
+Datum
+uuid_extract_var(PG_FUNCTION_ARGS)
+{
+	pg_uuid_t  *uuid = PG_GETARG_UUID_P(0);
+	uint16_t result;
+	result = uuid->data[8] >> 6;
+
+	PG_RETURN_UINT16(result);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 29af4ce65d5..f9be09464be 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9174,6 +9174,21 @@
 { oid => '3432', descr => 'generate random UUID',
   proname => 'gen_random_uuid', proleakproof => 't', provolatile => 'v',
   prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9895', descr => 'generate random UUID',
+  proname => 'uuidv4', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' },
+{ oid => '9896', descr => 'generate UUID version 7',
+  proname => 'uuidv7', proleakproof => 't', provolatile => 'v',
+  prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' },
+{ oid => '9897', descr => 'extract timestamp from UUID version 7',
+  proname => 'uuid_extract_time', proleakproof => 't',
+  prorettype => 'timestamptz', proargtypes => 'uuid', prosrc => 'uuid_extract_time' },
+{ oid => '9898', descr => 'extract version from RFC 4122 UUID',
+  proname => 'uuid_extract_ver', proleakproof => 't',
+  prorettype => 'int2', proargtypes => 'uuid', prosrc => 'uuid_extract_ver' },
+{ oid => '9899', descr => 'extract variant from UUID',
+  proname => 'uuid_extract_var', proleakproof => 't',
+  prorettype => 'int2', proargtypes => 'uuid', prosrc => 'uuid_extract_var' },
 
 # pg_lsn
 { oid => '3229', descr => 'I/O',
diff --git a/src/include/datatype/timestamp.h b/src/include/datatype/timestamp.h
index 3a37cb661e3..652aeb428e2 100644
--- a/src/include/datatype/timestamp.h
+++ b/src/include/datatype/timestamp.h
@@ -230,9 +230,10 @@ struct pg_itm_in
 	 ((y) < JULIAN_MAXYEAR || \
 	  ((y) == JULIAN_MAXYEAR && ((m) < JULIAN_MAXMONTH))))
 
-/* Julian-date equivalents of Day 0 in Unix and Postgres reckoning */
+/* Julian-date equivalents of Day 0 in Unix, Postgres and Gregorian epochs */
 #define UNIX_EPOCH_JDATE		2440588 /* == date2j(1970, 1, 1) */
 #define POSTGRES_EPOCH_JDATE	2451545 /* == date2j(2000, 1, 1) */
+#define GREGORIAN_EPOCH_JDATE	2299161 /* == date2j(1582,10,15) */
 
 /*
  * Range limits for dates and timestamps.
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 7610b011d68..f4b9ff654ab 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -872,6 +872,11 @@ xid8ge(xid8,xid8)
 xid8eq(xid8,xid8)
 xid8ne(xid8,xid8)
 xid8cmp(xid8,xid8)
+uuidv4()
+uuidv7()
+uuid_extract_time(uuid)
+uuid_extract_ver(uuid)
+uuid_extract_var(uuid)
 -- restore normal output mode
 \a\t
 -- List of functions used by libpq's fe-lobj.c
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 8e7f21910d6..f401a550885 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -168,5 +168,76 @@ SELECT count(DISTINCT guid_field) FROM guid1;
      2
 (1 row)
 
+-- test of uuidv4() alias
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+ count 
+-------
+     2
+(1 row)
+
+-- support functions for UUID versions and variants
+SELECT uuid_extract_ver(uuidv7());
+ uuid_extract_ver 
+------------------
+                7
+(1 row)
+
+SELECT uuid_extract_ver('{11111111-1111-1111-1111-111111111111}') IS NULL;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT uuid_extract_ver('{11111111-1111-5111-8111-111111111111}');
+ uuid_extract_ver 
+------------------
+                5
+(1 row)
+
+SELECT uuid_extract_var(uuidv7());
+ uuid_extract_var 
+------------------
+                2
+(1 row)
+
+-- uuid_extract_time() must refuse to accept non-UUIDv7
+SELECT uuid_extract_time(gen_random_uuid());
+ uuid_extract_time 
+-------------------
+ 
+(1 row)
+
+-- extract UUID v1, v6 and v7 timestamp
+SELECT uuid_extract_time('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT uuid_extract_time('1EC9414C-232A-6B00-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT uuid_extract_time('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';
+ ?column? 
+----------
+ t
+(1 row)
+
 -- clean up
 DROP TABLE guid1, guid2 CASCADE;
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index 9a8f437c7d2..c7362cf4e13 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -85,5 +85,31 @@ INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 INSERT INTO guid1 (guid_field) VALUES (gen_random_uuid());
 SELECT count(DISTINCT guid_field) FROM guid1;
 
+-- test of uuidv4() alias
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+INSERT INTO guid1 (guid_field) VALUES (uuidv4());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
+-- generation test for v7
+TRUNCATE guid1;
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+INSERT INTO guid1 (guid_field) VALUES (uuidv7());
+SELECT count(DISTINCT guid_field) FROM guid1;
+
+-- support functions for UUID versions and variants
+SELECT uuid_extract_ver(uuidv7());
+SELECT uuid_extract_ver('{11111111-1111-1111-1111-111111111111}') IS NULL;
+SELECT uuid_extract_ver('{11111111-1111-5111-8111-111111111111}');
+SELECT uuid_extract_var(uuidv7());
+
+-- uuid_extract_time() must refuse to accept non-UUIDv7
+SELECT uuid_extract_time(gen_random_uuid());
+
+-- extract UUID v1, v6 and v7 timestamp
+SELECT uuid_extract_time('C232AB00-9414-11EC-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';
+SELECT uuid_extract_time('1EC9414C-232A-6B00-B3C8-9F6BDECED846') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';
+SELECT uuid_extract_time('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday, February 22, 2022 2:22:22.00 PM GMT+05:00';
+
 -- clean up
 DROP TABLE guid1, guid2 CASCADE;
-- 
2.42.0

