From cd78c0872ec9791e100e4569a980f5988ec5a13d Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Fri, 31 Jan 2025 12:03:16 +0500
Subject: [PATCH v3] UUDv7: fix offset computations in dates after 2262

We used nanosecond representation of offsetted time values which
cannot be stored in 64-bit integer for dates significantly after
beginning of UNIX epoch. To prevent overflow we separate millisecond
part from nanoseconds, thus allowing us to store both parts in 64-bit
integers.
---
 doc/src/sgml/func.sgml             |  6 ++---
 src/backend/utils/adt/uuid.c       | 37 ++++++++++++++++--------------
 src/include/catalog/pg_proc.dat    |  2 +-
 src/test/regress/expected/uuid.out | 15 +++++++++++-
 src/test/regress/sql/uuid.sql      | 12 +++++++++-
 5 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7efc81936a..8cf3e374b8 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -14330,12 +14330,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
 </synopsis>
    These functions return a version 4 (random) UUID.
 <synopsis>
-<function>uuidv7</function> (<optional> <parameter>shift</parameter> <type>interval</type> </optional>) <returnvalue>uuid</returnvalue>
+<function>uuidv7</function> (<optional> <parameter>offset</parameter> <type>interval</type> </optional>) <returnvalue>uuid</returnvalue>
 </synopsis>
     This function returns a version 7 UUID (UNIX timestamp with millisecond
     precision + sub-millisecond timestamp + random). This function can accept
-    optional <parameter>shift</parameter> parameter of type <type>interval</type>
-    which shift internal timestamp by the given interval.
+    optional <parameter>offset</parameter> parameter of type <type>interval</type>
+    which offset internal timestamp by the given interval.
   </para>
 
   <para>
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 4f8402ef92..f368081cc4 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -29,6 +29,7 @@
 #define NS_PER_S	INT64CONST(1000000000)
 #define NS_PER_MS	INT64CONST(1000000)
 #define NS_PER_US	INT64CONST(1000)
+#define US_PER_MS	INT64CONST(1000)
 
 /*
  * UUID version 7 uses 12 bits in "rand_a" to store  1/4096 (or 2^12) fractions of
@@ -69,6 +70,7 @@ static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
 static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
 static inline void uuid_set_version(pg_uuid_t *uuid, unsigned char version);
 static inline int64 get_real_time_ns_ascending();
+static pg_uuid_t *generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms);
 
 Datum
 uuid_in(PG_FUNCTION_ARGS)
@@ -523,18 +525,18 @@ get_real_time_ns_ascending()
  * described in the RFC. This method utilizes 12 bits from the "rand_a" bits
  * to store a 1/4096 (or 2^12) fraction of sub-millisecond precision.
  *
- * ns is a number of nanoseconds since start of the UNIX epoch. This value is
+ * unix_ts_ms is a number of milliseconds since start of the UNIX epoch,
+ * sub_ms is a number of nanoseconds within millisecond. These values are
  * used for time-dependent bits of UUID.
+ *
+ * NB: all numbers here are unsigned, unix_ts_ms cannot be negative per RFC.
  */
 static pg_uuid_t *
-generate_uuidv7(int64 ns)
+generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms)
 {
 	pg_uuid_t  *uuid = palloc(UUID_LEN);
-	int64		unix_ts_ms;
 	int32		increased_clock_precision;
 
-	unix_ts_ms = ns / NS_PER_MS;
-
 	/* Fill in time part */
 	uuid->data[0] = (unsigned char) (unix_ts_ms >> 40);
 	uuid->data[1] = (unsigned char) (unix_ts_ms >> 32);
@@ -547,7 +549,7 @@ generate_uuidv7(int64 ns)
 	 * sub-millisecond timestamp fraction (SUBMS_BITS bits, not
 	 * SUBMS_MINIMAL_STEP_BITS)
 	 */
-	increased_clock_precision = ((ns % NS_PER_MS) * (1 << SUBMS_BITS)) / NS_PER_MS;
+	increased_clock_precision = ((sub_ms) * (1 << SUBMS_BITS)) / NS_PER_MS;
 
 	/* Fill the increased clock precision to "rand_a" bits */
 	uuid->data[6] = (unsigned char) (increased_clock_precision >> 8);
@@ -586,7 +588,8 @@ generate_uuidv7(int64 ns)
 Datum
 uuidv7(PG_FUNCTION_ARGS)
 {
-	pg_uuid_t  *uuid = generate_uuidv7(get_real_time_ns_ascending());
+	int64		ns = get_real_time_ns_ascending();
+	pg_uuid_t  *uuid = generate_uuidv7(ns / NS_PER_MS, ns % NS_PER_MS);
 
 	PG_RETURN_UUID_P(uuid);
 }
@@ -600,14 +603,17 @@ uuidv7_interval(PG_FUNCTION_ARGS)
 	Interval   *shift = PG_GETARG_INTERVAL_P(0);
 	TimestampTz ts;
 	pg_uuid_t  *uuid;
+	/*
+	 * 64 bits is enough for ns in our centuries(until 2200-ies), but not for
+	 * a whole time range of UUID (year 10888).
+	 */
 	int64		ns = get_real_time_ns_ascending();
+	int64		us;
 
 	/*
 	 * Shift the current timestamp by the given interval. To calculate time
 	 * shift correctly, we convert the UNIX epoch to TimestampTz and use
-	 * timestamptz_pl_interval(). Since this calculation is done with
-	 * microsecond precision, we carry nanoseconds from original ns value to
-	 * shifted ns value.
+	 * timestamptz_pl_interval().
 	 */
 
 	ts = (TimestampTz) (ns / NS_PER_US) -
@@ -618,14 +624,11 @@ uuidv7_interval(PG_FUNCTION_ARGS)
 												 TimestampTzGetDatum(ts),
 												 IntervalPGetDatum(shift)));
 
-	/*
-	 * Convert a TimestampTz value back to an UNIX epoch and back nanoseconds.
-	 */
-	ns = (ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC)
-		* NS_PER_US + ns % NS_PER_US;
+	/* Convert a TimestampTz value back to an UNIX epoch in us */
+	us = (ts + (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY * USECS_PER_SEC);
 
-	/* Generate an UUIDv7 */
-	uuid = generate_uuidv7(ns);
+	/* Generate an UUIDv7, not forgetting ns remainder */
+	uuid = generate_uuidv7(us / US_PER_MS, (us % US_PER_MS) * NS_PER_US + ns % NS_PER_US);
 
 	PG_RETURN_UUID_P(uuid);
 }
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 5b8c2ad2a5..c28cace01a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9392,7 +9392,7 @@
   proname => 'uuidv7', provolatile => 'v',
   prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' },
 { oid => '9897', descr => 'generate UUID version 7 with a timestamp shifted by specified interval',
-  proname => 'uuidv7', provolatile => 'v', proargnames => '{shift}',
+  proname => 'uuidv7', provolatile => 'v', proargnames => '{offset}',
   prorettype => 'uuid', proargtypes => 'interval', prosrc => 'uuidv7_interval' },
 { oid => '6342', descr => 'extract timestamp from UUID',
   proname => 'uuid_extract_timestamp', proleakproof => 't',
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 798633ad51..96e93fbb28 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -290,5 +290,18 @@ SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111');  -- null
  
 (1 row)
 
+-- offset generation
+CREATE TABLE guid4(c SERIAL, d uuid, t timestamptz generated always as
+(uuid_extract_timestamp(d)) stored);
+-- generate UUIDs up to year 10000
+INSERT INTO guid4 (d) SELECT uuidv7((n || 'years')::interval) FROM generate_series(1, 8000) n; -- should work fine until year 28888 = 10888 (end of UUIDv7) - 8000
+SELECT count(*) FROM 
+	(SELECT t - lag(t) OVER (ORDER BY c) AS diff FROM guid4)
+WHERE diff > '367 days'; -- If UUIDs would be generated instantly and without overlap we would have up to '366 days'. One day is extra tolerance in case of machine stalls
+ count 
+-------
+     0
+(1 row)
+
 -- clean up
-DROP TABLE guid1, guid2, guid3 CASCADE;
+DROP TABLE guid1, guid2, guid3, guid4 CASCADE;
diff --git a/src/test/regress/sql/uuid.sql b/src/test/regress/sql/uuid.sql
index 110188361d..9b0bec4912 100644
--- a/src/test/regress/sql/uuid.sql
+++ b/src/test/regress/sql/uuid.sql
@@ -134,6 +134,16 @@ SELECT uuid_extract_timestamp('017F22E2-79B0-7CC3-98C4-DC0C0C07398F') = 'Tuesday
 SELECT uuid_extract_timestamp(gen_random_uuid());  -- null
 SELECT uuid_extract_timestamp('11111111-1111-1111-1111-111111111111');  -- null
 
+-- offset generation
+CREATE TABLE guid4(c SERIAL, d uuid, t timestamptz generated always as
+(uuid_extract_timestamp(d)) stored);
+
+-- generate UUIDs up to year 10000
+INSERT INTO guid4 (d) SELECT uuidv7((n || 'years')::interval) FROM generate_series(1, 8000) n; -- should work fine until year 28888 = 10888 (end of UUIDv7) - 8000
+
+SELECT count(*) FROM 
+	(SELECT t - lag(t) OVER (ORDER BY c) AS diff FROM guid4)
+WHERE diff > '367 days'; -- If UUIDs would be generated instantly and without overlap we would have up to '366 days'. One day is extra tolerance in case of machine stalls
 
 -- clean up
-DROP TABLE guid1, guid2, guid3 CASCADE;
+DROP TABLE guid1, guid2, guid3, guid4 CASCADE;
-- 
2.39.5 (Apple Git-154)

