https://github.com/python/cpython/commit/3929af5e3a203291dc07b40c9c3515492e3ba7b4
commit: 3929af5e3a203291dc07b40c9c3515492e3ba7b4
branch: main
author: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
committer: picnixz <10796600+picn...@users.noreply.github.com>
date: 2025-03-04T10:47:19+01:00
summary:

gh-89083: add support for UUID version 7 (RFC 9562) (#121119)

Add support for generating UUIDv7 objects according to RFC 9562, §5.7 [1].

The functionality is provided by the `uuid.uuid7()` function. The implementation
is based on a 42-bit counter as described by Method 1, §6.2 [2] and guarantees
monotonicity within the same millisecond.

[1]: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.7
[2]: https://www.rfc-editor.org/rfc/rfc9562.html#section-6.2

---------

Co-authored-by: Hugo van Kemenade <1324225+hug...@users.noreply.github.com>
Co-authored-by: Victor Stinner <vstin...@python.org>
Co-authored-by: Éric <mer...@netwok.org>

files:
A Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst
M Doc/library/uuid.rst
M Doc/whatsnew/3.14.rst
M Lib/test/test_uuid.py
M Lib/uuid.py

diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst
index 88e5fae70b76d9..0fb29460e2e68a 100644
--- a/Doc/library/uuid.rst
+++ b/Doc/library/uuid.rst
@@ -11,9 +11,10 @@
 --------------
 
 This module provides immutable :class:`UUID` objects (the :class:`UUID` class)
-and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`,
-:func:`uuid6`, and :func:`uuid8` for generating version 1, 3, 4, 5, 6,
-and 8 UUIDs as specified in :rfc:`9562` (which supersedes :rfc:`4122`).
+and :ref:`functions <uuid-factory-functions>` for generating UUIDs 
corresponding
+to a specific UUID version as specified in :rfc:`9562` (which supersedes 
:rfc:`4122`),
+for example, :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 
3, and so on.
+Note that UUID version 2 is deliberately omitted as it is outside the scope of 
the RFC.
 
 If all you want is a unique ID, you should probably call :func:`uuid1` or
 :func:`uuid4`.  Note that :func:`uuid1` may compromise privacy since it creates
@@ -154,7 +155,7 @@ which relays any information about the UUID's safety, using 
this enumeration:
    :const:`RFC_4122`).
 
    .. versionchanged:: next
-      Added UUID versions 6 and 8.
+      Added UUID versions 6, 7 and 8.
 
 
 .. attribute:: UUID.is_safe
@@ -185,6 +186,8 @@ The :mod:`uuid` module defines the following functions:
       globally unique, while the latter are not.
 
 
+.. _uuid-factory-functions:
+
 .. function:: uuid1(node=None, clock_seq=None)
 
    Generate a UUID from a host ID, sequence number, and the current time. If 
*node*
@@ -228,6 +231,18 @@ The :mod:`uuid` module defines the following functions:
    .. versionadded:: next
 
 
+.. function:: uuid7()
+
+   Generate a time-based UUID according to
+   :rfc:`RFC 9562, §5.7 <9562#section-5.7>`.
+
+   For portability across platforms lacking sub-millisecond precision, UUIDs
+   produced by this function embed a 48-bit timestamp and use a 42-bit counter
+   to guarantee monotonicity within a millisecond.
+
+   .. versionadded:: next
+
+
 .. function:: uuid8(a=None, b=None, c=None)
 
    Generate a pseudo-random UUID according to
@@ -330,7 +345,7 @@ The :mod:`uuid` module can be executed as a script from the 
command line.
 
 .. code-block:: sh
 
-   python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid6,uuid8}] [-n 
NAMESPACE] [-N NAME]
+   python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid6,uuid7,uuid8}] [-n 
NAMESPACE] [-N NAME]
 
 The following options are accepted:
 
@@ -347,7 +362,7 @@ The following options are accepted:
    is used.
 
    .. versionchanged:: next
-      Allow generating UUID versions 6 and 8.
+      Allow generating UUID versions 6, 7 and 8.
 
 .. option:: -n <namespace>
             --namespace <namespace>
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 2875913e4abdf1..0721ea5cb8d5d1 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -924,8 +924,9 @@ urllib
 uuid
 ----
 
-* Add support for UUID versions 6 and 8 via :func:`uuid.uuid6` and
-  :func:`uuid.uuid8` respectively, as specified in :rfc:`9562`.
+* Add support for UUID versions 6, 7, and 8 via :func:`uuid.uuid6`,
+  :func:`uuid.uuid7`, and :func:`uuid.uuid8` respectively, as specified
+  in :rfc:`9562`.
   (Contributed by Bénédikt Tran in :gh:`89083`.)
 
 * :const:`uuid.NIL` and :const:`uuid.MAX` are now available to represent the
diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py
index e284de93fbdfd1..bcc673233f6588 100755
--- a/Lib/test/test_uuid.py
+++ b/Lib/test/test_uuid.py
@@ -871,6 +871,206 @@ def test_uuid6_test_vectors(self):
             equal((u.int >> 80) & 0xffff, 0x232a)
             equal((u.int >> 96) & 0xffff_ffff, 0x1ec9_414c)
 
+    def test_uuid7(self):
+        equal = self.assertEqual
+        u = self.uuid.uuid7()
+        equal(u.variant, self.uuid.RFC_4122)
+        equal(u.version, 7)
+
+        # 1 Jan 2023 12:34:56.123_456_789
+        timestamp_ns = 1672533296_123_456_789  # ns precision
+        timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
+
+        for _ in range(100):
+            counter_hi = random.getrandbits(11)
+            counter_lo = random.getrandbits(30)
+            counter = (counter_hi << 30) | counter_lo
+
+            tail = random.getrandbits(32)
+            # effective number of bits is 32 + 30 + 11 = 73
+            random_bits = counter << 32 | tail
+
+            # set all remaining MSB of fake random bits to 1 to ensure that
+            # the implementation correctly removes them
+            random_bits = (((1 << 7) - 1) << 73) | random_bits
+            random_data = random_bits.to_bytes(10)
+
+            with (
+                mock.patch.multiple(
+                    self.uuid,
+                    _last_timestamp_v7=None,
+                    _last_counter_v7=0,
+                ),
+                mock.patch('time.time_ns', return_value=timestamp_ns),
+                mock.patch('os.urandom', return_value=random_data) as urand
+            ):
+                u = self.uuid.uuid7()
+                urand.assert_called_once_with(10)
+                equal(u.variant, self.uuid.RFC_4122)
+                equal(u.version, 7)
+
+                equal(self.uuid._last_timestamp_v7, timestamp_ms)
+                equal(self.uuid._last_counter_v7, counter)
+
+                unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff
+                equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
+
+                equal((u.int >> 75) & 1, 0)  # check that the MSB is 0
+                equal((u.int >> 64) & 0xfff, counter_hi)
+                equal((u.int >> 32) & 0x3fff_ffff, counter_lo)
+                equal(u.int & 0xffff_ffff, tail)
+
+    def test_uuid7_uniqueness(self):
+        # Test that UUIDv7-generated values are unique.
+        #
+        # While UUIDv8 has an entropy of 122 bits, those 122 bits may not
+        # necessarily be sampled from a PRNG. On the other hand, UUIDv7
+        # uses os.urandom() as a PRNG which features better randomness.
+        N = 1000
+        uuids = {self.uuid.uuid7() for _ in range(N)}
+        self.assertEqual(len(uuids), N)
+
+        versions = {u.version for u in uuids}
+        self.assertSetEqual(versions, {7})
+
+    def test_uuid7_monotonicity(self):
+        equal = self.assertEqual
+
+        us = [self.uuid.uuid7() for _ in range(10_000)]
+        equal(us, sorted(us))
+
+        with mock.patch.multiple(
+            self.uuid,
+            _last_timestamp_v7=0,
+            _last_counter_v7=0,
+        ):
+            # 1 Jan 2023 12:34:56.123_456_789
+            timestamp_ns = 1672533296_123_456_789  # ns precision
+            timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
+
+            # counter_{hi,lo} are chosen so that "counter + 1" does not 
overflow
+            counter_hi = random.getrandbits(11)
+            counter_lo = random.getrandbits(29)
+            counter = (counter_hi << 30) | counter_lo
+            self.assertLess(counter + 1, 0x3ff_ffff_ffff)
+
+            tail = random.getrandbits(32)
+            random_bits = counter << 32 | tail
+            random_data = random_bits.to_bytes(10)
+
+            with (
+                mock.patch('time.time_ns', return_value=timestamp_ns),
+                mock.patch('os.urandom', return_value=random_data) as urand
+            ):
+                u1 = self.uuid.uuid7()
+                urand.assert_called_once_with(10)
+                equal(self.uuid._last_timestamp_v7, timestamp_ms)
+                equal(self.uuid._last_counter_v7, counter)
+                equal((u1.int >> 64) & 0xfff, counter_hi)
+                equal((u1.int >> 32) & 0x3fff_ffff, counter_lo)
+                equal(u1.int & 0xffff_ffff, tail)
+
+            # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same 
ns)
+            next_timestamp_ns = 1672533296_123_457_032
+            next_timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
+            equal(timestamp_ms, next_timestamp_ms)
+
+            next_tail_bytes = os.urandom(4)
+            next_fail = int.from_bytes(next_tail_bytes)
+
+            with (
+                mock.patch('time.time_ns', return_value=next_timestamp_ns),
+                mock.patch('os.urandom', return_value=next_tail_bytes) as urand
+            ):
+                u2 = self.uuid.uuid7()
+                urand.assert_called_once_with(4)
+                # same milli-second
+                equal(self.uuid._last_timestamp_v7, timestamp_ms)
+                # 42-bit counter advanced by 1
+                equal(self.uuid._last_counter_v7, counter + 1)
+                equal((u2.int >> 64) & 0xfff, counter_hi)
+                equal((u2.int >> 32) & 0x3fff_ffff, counter_lo + 1)
+                equal(u2.int & 0xffff_ffff, next_fail)
+
+            self.assertLess(u1, u2)
+
+    def test_uuid7_timestamp_backwards(self):
+        equal = self.assertEqual
+        # 1 Jan 2023 12:34:56.123_456_789
+        timestamp_ns = 1672533296_123_456_789  # ns precision
+        timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
+        fake_last_timestamp_v7 = timestamp_ms + 1
+
+        # counter_{hi,lo} are chosen so that "counter + 1" does not overflow
+        counter_hi = random.getrandbits(11)
+        counter_lo = random.getrandbits(29)
+        counter = (counter_hi << 30) | counter_lo
+        self.assertLess(counter + 1, 0x3ff_ffff_ffff)
+
+        tail_bytes = os.urandom(4)
+        tail = int.from_bytes(tail_bytes)
+
+        with (
+            mock.patch.multiple(
+                self.uuid,
+                _last_timestamp_v7=fake_last_timestamp_v7,
+                _last_counter_v7=counter,
+            ),
+            mock.patch('time.time_ns', return_value=timestamp_ns),
+            mock.patch('os.urandom', return_value=tail_bytes) as urand
+        ):
+            u = self.uuid.uuid7()
+            urand.assert_called_once_with(4)
+            equal(u.variant, self.uuid.RFC_4122)
+            equal(u.version, 7)
+            equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1)
+            unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffff_ffff_ffff
+            equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
+            # 42-bit counter advanced by 1
+            equal(self.uuid._last_counter_v7, counter + 1)
+            equal((u.int >> 64) & 0xfff, counter_hi)
+            # 42-bit counter advanced by 1 (counter_hi is untouched)
+            equal((u.int >> 32) & 0x3fff_ffff, counter_lo + 1)
+            equal(u.int & 0xffff_ffff, tail)
+
+    def test_uuid7_overflow_counter(self):
+        equal = self.assertEqual
+        # 1 Jan 2023 12:34:56.123_456_789
+        timestamp_ns = 1672533296_123_456_789  # ns precision
+        timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
+
+        new_counter_hi = random.getrandbits(11)
+        new_counter_lo = random.getrandbits(30)
+        new_counter = (new_counter_hi << 30) | new_counter_lo
+
+        tail = random.getrandbits(32)
+        random_bits = (new_counter << 32) | tail
+        random_data = random_bits.to_bytes(10)
+
+        with (
+            mock.patch.multiple(
+                self.uuid,
+                _last_timestamp_v7=timestamp_ms,
+                # same timestamp, but force an overflow on the counter
+                _last_counter_v7=0x3ff_ffff_ffff,
+            ),
+            mock.patch('time.time_ns', return_value=timestamp_ns),
+            mock.patch('os.urandom', return_value=random_data) as urand
+        ):
+            u = self.uuid.uuid7()
+            urand.assert_called_with(10)
+            equal(u.variant, self.uuid.RFC_4122)
+            equal(u.version, 7)
+            # timestamp advanced due to overflow
+            equal(self.uuid._last_timestamp_v7, timestamp_ms + 1)
+            unix_ts_ms = (timestamp_ms + 1) & 0xffff_ffff_ffff
+            equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
+            # counter overflowed, so we picked a new one
+            equal(self.uuid._last_counter_v7, new_counter)
+            equal((u.int >> 64) & 0xfff, new_counter_hi)
+            equal((u.int >> 32) & 0x3fff_ffff, new_counter_lo)
+            equal(u.int & 0xffff_ffff, tail)
+
     def test_uuid8(self):
         equal = self.assertEqual
         u = self.uuid.uuid8()
diff --git a/Lib/uuid.py b/Lib/uuid.py
index ed69b4de07b53f..7a12b48cb008f1 100644
--- a/Lib/uuid.py
+++ b/Lib/uuid.py
@@ -1,8 +1,12 @@
 r"""UUID objects (universally unique identifiers) according to RFC 4122/9562.
 
-This module provides immutable UUID objects (class UUID) and the functions
-uuid1(), uuid3(), uuid4(), uuid5(), uuid6(), and uuid8() for generating
-version 1, 3, 4, 5, 6, and 8 UUIDs as specified in RFC 4122/9562.
+This module provides immutable UUID objects (class UUID) and functions for
+generating UUIDs corresponding to a specific UUID version as specified in
+RFC 4122/9562, e.g., uuid1() for UUID version 1, uuid3() for UUID version 3,
+and so on.
+
+Note that UUID version 2 is deliberately omitted as it is outside the scope
+of the RFC.
 
 If all you want is a unique ID, you should probably call uuid1() or uuid4().
 Note that uuid1() may compromise privacy since it creates a UUID containing
@@ -54,6 +58,7 @@
 
 import os
 import sys
+import time
 
 from enum import Enum, _simple_enum
 
@@ -102,6 +107,7 @@ class SafeUUID:
 _RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48))
 _RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48))
 _RFC_4122_VERSION_6_FLAGS = ((6 << 76) | (0x8000 << 48))
+_RFC_4122_VERSION_7_FLAGS = ((7 << 76) | (0x8000 << 48))
 _RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48))
 
 
@@ -720,7 +726,6 @@ def uuid1(node=None, clock_seq=None):
         return UUID(bytes=uuid_time, is_safe=is_safe)
 
     global _last_timestamp
-    import time
     nanoseconds = time.time_ns()
     # 0x01b21dd213814000 is the number of 100-ns intervals between the
     # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
@@ -770,6 +775,7 @@ def uuid5(namespace, name):
     int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS
     return UUID._from_int(int_uuid_5)
 
+
 _last_timestamp_v6 = None
 
 def uuid6(node=None, clock_seq=None):
@@ -808,6 +814,83 @@ def uuid6(node=None, clock_seq=None):
     int_uuid_6 |= _RFC_4122_VERSION_6_FLAGS
     return UUID._from_int(int_uuid_6)
 
+
+_last_timestamp_v7 = None
+_last_counter_v7 = 0  # 42-bit counter
+
+def _uuid7_get_counter_and_tail():
+    rand = int.from_bytes(os.urandom(10))
+    # 42-bit counter with MSB set to 0
+    counter = (rand >> 32) & 0x1ff_ffff_ffff
+    # 32-bit random data
+    tail = rand & 0xffff_ffff
+    return counter, tail
+
+
+def uuid7():
+    """Generate a UUID from a Unix timestamp in milliseconds and random bits.
+
+    UUIDv7 objects feature monotonicity within a millisecond.
+    """
+    # --- 48 ---   -- 4 --   --- 12 ---   -- 2 --   --- 30 ---   - 32 -
+    # unix_ts_ms | version | counter_hi | variant | counter_lo | random
+    #
+    # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed
+    # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0.
+    #
+    # 'random' is a 32-bit random value regenerated for every new UUID.
+    #
+    # If multiple UUIDs are generated within the same millisecond, the LSB
+    # of 'counter' is incremented by 1. When overflowing, the timestamp is
+    # advanced and the counter is reset to a random 42-bit integer with MSB
+    # set to 0.
+
+    global _last_timestamp_v7
+    global _last_counter_v7
+
+    nanoseconds = time.time_ns()
+    timestamp_ms = nanoseconds // 1_000_000
+
+    if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7:
+        counter, tail = _uuid7_get_counter_and_tail()
+    else:
+        if timestamp_ms < _last_timestamp_v7:
+            timestamp_ms = _last_timestamp_v7 + 1
+        # advance the 42-bit counter
+        counter = _last_counter_v7 + 1
+        if counter > 0x3ff_ffff_ffff:
+            # advance the 48-bit timestamp
+            timestamp_ms += 1
+            counter, tail = _uuid7_get_counter_and_tail()
+        else:
+            # 32-bit random data
+            tail = int.from_bytes(os.urandom(4))
+
+    unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff
+    counter_msbs = counter >> 30
+    # keep 12 counter's MSBs and clear variant bits
+    counter_hi = counter_msbs & 0x0fff
+    # keep 30 counter's LSBs and clear version bits
+    counter_lo = counter & 0x3fff_ffff
+    # ensure that the tail is always a 32-bit integer (by construction,
+    # it is already the case, but future interfaces may allow the user
+    # to specify the random tail)
+    tail &= 0xffff_ffff
+
+    int_uuid_7 = unix_ts_ms << 80
+    int_uuid_7 |= counter_hi << 64
+    int_uuid_7 |= counter_lo << 32
+    int_uuid_7 |= tail
+    # by construction, the variant and version bits are already cleared
+    int_uuid_7 |= _RFC_4122_VERSION_7_FLAGS
+    res = UUID._from_int(int_uuid_7)
+
+    # defer global update until all computations are done
+    _last_timestamp_v7 = timestamp_ms
+    _last_counter_v7 = counter
+    return res
+
+
 def uuid8(a=None, b=None, c=None):
     """Generate a UUID from three custom blocks.
 
@@ -833,6 +916,7 @@ def uuid8(a=None, b=None, c=None):
     int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS
     return UUID._from_int(int_uuid_8)
 
+
 def main():
     """Run the uuid command line interface."""
     uuid_funcs = {
@@ -841,6 +925,7 @@ def main():
         "uuid4": uuid4,
         "uuid5": uuid5,
         "uuid6": uuid6,
+        "uuid7": uuid7,
         "uuid8": uuid8,
     }
     uuid_namespace_funcs = ("uuid3", "uuid5")
diff --git 
a/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst 
b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst
new file mode 100644
index 00000000000000..f85e05622623c2
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst
@@ -0,0 +1,2 @@
+Add :func:`uuid.uuid7` for generating UUIDv7 objects as specified in
+:rfc:`9562`. Patch by Bénédikt Tran.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to