https://github.com/python/cpython/commit/990ad272f66fe6a50087ad044725bb0f9f8e181d
commit: 990ad272f66fe6a50087ad044725bb0f9f8e181d
branch: main
author: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
committer: picnixz <10796600+picn...@users.noreply.github.com>
date: 2025-03-02T12:41:56+01:00
summary:

gh-89083: add support for UUID version 6 (RFC 9562) (#120650)

Add support for generating UUIDv6 objects according to RFC 9562, §5.6 [1].

The functionality is provided by the `uuid.uuid6()` function which takes as 
inputs an optional 48-bit
hardware address and an optional 14-bit clock sequence. The UUIDv6 temporal 
fields are ordered
differently than those of UUIDv1, thereby providing improved database locality.

[1]: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.6

---------

Co-authored-by: Hugo van Kemenade <1324225+hug...@users.noreply.github.com>
Co-authored-by: Victor Stinner <vstin...@python.org>

files:
A Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst
M Doc/library/uuid.rst
M Doc/whatsnew/3.14.rst
M Lib/test/test_uuid.py
M Lib/uuid.py

diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst
index c661fa2e52565c..88e5fae70b76d9 100644
--- a/Doc/library/uuid.rst
+++ b/Doc/library/uuid.rst
@@ -12,8 +12,8 @@
 
 This module provides immutable :class:`UUID` objects (the :class:`UUID` class)
 and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`,
-and :func:`uuid.uuid8` for generating version 1, 3, 4, 5, and 8 UUIDs as
-specified in :rfc:`9562` (which supersedes :rfc:`4122`).
+:func:`uuid6`, and :func:`uuid8` for generating version 1, 3, 4, 5, 6,
+and 8 UUIDs as specified in :rfc:`9562` (which supersedes :rfc:`4122`).
 
 If all you want is a unique ID, you should probably call :func:`uuid1` or
 :func:`uuid4`.  Note that :func:`uuid1` may compromise privacy since it creates
@@ -153,8 +153,8 @@ which relays any information about the UUID's safety, using 
this enumeration:
    The UUID version number (1 through 8, meaningful only when the variant is
    :const:`RFC_4122`).
 
-   .. versionchanged:: 3.14
-      Added UUID version 8.
+   .. versionchanged:: next
+      Added UUID versions 6 and 8.
 
 
 .. attribute:: UUID.is_safe
@@ -212,6 +212,22 @@ The :mod:`uuid` module defines the following functions:
    that will be encoded using UTF-8).
 
 
+.. function:: uuid6(node=None, clock_seq=None)
+
+   Generate a UUID from a sequence number and the current time according to
+   :rfc:`9562`.
+   This is an alternative to :func:`uuid1` to improve database locality.
+
+   When *node* is not specified, :func:`getnode` is used to obtain the hardware
+   address as a 48-bit positive integer. When a sequence number *clock_seq* is
+   not specified, a pseudo-random 14-bit positive integer is generated.
+
+   If *node* or *clock_seq* exceed their expected bit count, only their least
+   significant bits are kept.
+
+   .. versionadded:: next
+
+
 .. function:: uuid8(a=None, b=None, c=None)
 
    Generate a pseudo-random UUID according to
@@ -314,7 +330,7 @@ The :mod:`uuid` module can be executed as a script from the 
command line.
 
 .. code-block:: sh
 
-   python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid8}] [-n NAMESPACE] [-N 
NAME]
+   python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid6,uuid8}] [-n 
NAMESPACE] [-N NAME]
 
 The following options are accepted:
 
@@ -330,8 +346,8 @@ The following options are accepted:
    Specify the function name to use to generate the uuid. By default 
:func:`uuid4`
    is used.
 
-   .. versionadded:: 3.14
-      Allow generating UUID version 8.
+   .. versionchanged:: next
+      Allow generating UUID versions 6 and 8.
 
 .. option:: -n <namespace>
             --namespace <namespace>
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index dbd59a9d7be150..ce75b5fffc0a4c 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -919,8 +919,8 @@ urllib
 uuid
 ----
 
-* Add support for UUID version 8 via :func:`uuid.uuid8` as specified
-  in :rfc:`9562`.
+* Add support for UUID versions 6 and 8 via :func:`uuid.uuid6` and
+  :func:`uuid.uuid8` respectively, as specified in :rfc:`9562`.
   (Contributed by Bénédikt Tran in :gh:`89083`.)
 
 * :const:`uuid.NIL` and :const:`uuid.MAX` are now available to represent the
diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py
index 8216c4dd00e35a..e284de93fbdfd1 100755
--- a/Lib/test/test_uuid.py
+++ b/Lib/test/test_uuid.py
@@ -1,6 +1,3 @@
-import unittest
-from test import support
-from test.support import import_helper
 import builtins
 import contextlib
 import copy
@@ -10,10 +7,14 @@
 import pickle
 import random
 import sys
+import unittest
 import weakref
 from itertools import product
 from unittest import mock
 
+from test import support
+from test.support import import_helper
+
 py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid'])
 c_uuid = import_helper.import_fresh_module('uuid', fresh=['_uuid'])
 
@@ -724,6 +725,152 @@ def test_uuid5(self):
             equal(u, self.uuid.UUID(v))
             equal(str(u), v)
 
+    def test_uuid6(self):
+        equal = self.assertEqual
+        u = self.uuid.uuid6()
+        equal(u.variant, self.uuid.RFC_4122)
+        equal(u.version, 6)
+
+        fake_nanoseconds = 0x1571_20a1_de1a_c533
+        fake_node_value = 0x54e1_acf6_da7f
+        fake_clock_seq = 0x14c5
+        with (
+            mock.patch.object(self.uuid, '_last_timestamp_v6', None),
+            mock.patch.object(self.uuid, 'getnode', 
return_value=fake_node_value),
+            mock.patch('time.time_ns', return_value=fake_nanoseconds),
+            mock.patch('random.getrandbits', return_value=fake_clock_seq)
+        ):
+            u = self.uuid.uuid6()
+            equal(u.variant, self.uuid.RFC_4122)
+            equal(u.version, 6)
+
+            # 32 (top) | 16 (mid) | 12 (low) == 60 (timestamp)
+            equal(u.time, 0x1e901fca_7a55_b92)
+            equal(u.fields[0], 0x1e901fca)  # 32 top bits of time
+            equal(u.fields[1], 0x7a55)  # 16 mid bits of time
+            # 4 bits of version + 12 low bits of time
+            equal((u.fields[2] >> 12) & 0xf, 6)
+            equal((u.fields[2] & 0xfff), 0xb92)
+            # 2 bits of variant + 6 high bits of clock_seq
+            equal((u.fields[3] >> 6) & 0xf, 2)
+            equal(u.fields[3] & 0x3f, fake_clock_seq >> 8)
+            # 8 low bits of clock_seq
+            equal(u.fields[4], fake_clock_seq & 0xff)
+            equal(u.fields[5], fake_node_value)
+
+    def test_uuid6_uniqueness(self):
+        # Test that UUIDv6-generated values are unique.
+
+        # Unlike UUIDv8, only 62 bits can be randomized for UUIDv6.
+        # In practice, however, it remains unlikely to generate two
+        # identical UUIDs for the same 60-bit timestamp if neither
+        # the node ID nor the clock sequence is specified.
+        uuids = {self.uuid.uuid6() for _ in range(1000)}
+        self.assertEqual(len(uuids), 1000)
+        versions = {u.version for u in uuids}
+        self.assertSetEqual(versions, {6})
+
+        timestamp = 0x1ec9414c_232a_b00
+        fake_nanoseconds = (timestamp - 0x1b21dd21_3814_000) * 100
+
+        with mock.patch('time.time_ns', return_value=fake_nanoseconds):
+            def gen():
+                with mock.patch.object(self.uuid, '_last_timestamp_v6', None):
+                    return self.uuid.uuid6(node=0, clock_seq=None)
+
+            # By the birthday paradox, sampling N = 1024 UUIDs with identical
+            # node IDs and timestamps results in duplicates with probability
+            # close to 1 (not having a duplicate happens with probability of
+            # order 1E-15) since only the 14-bit clock sequence is randomized.
+            N = 1024
+            uuids = {gen() for _ in range(N)}
+            self.assertSetEqual({u.node for u in uuids}, {0})
+            self.assertSetEqual({u.time for u in uuids}, {timestamp})
+            self.assertLess(len(uuids), N, 'collision property does not hold')
+
+    def test_uuid6_node(self):
+        # Make sure the given node ID appears in the UUID.
+        #
+        # Note: when no node ID is specified, the same logic as for UUIDv1
+        # is applied to UUIDv6. In particular, there is no need to test that
+        # getnode() correctly returns positive integers of exactly 48 bits
+        # since this is done in test_uuid1_eui64().
+        self.assertLessEqual(self.uuid.uuid6().node.bit_length(), 48)
+
+        self.assertEqual(self.uuid.uuid6(0).node, 0)
+
+        # tests with explicit values
+        max_node = 0xffff_ffff_ffff
+        self.assertEqual(self.uuid.uuid6(max_node).node, max_node)
+        big_node = 0xE_1234_5678_ABCD  # 52-bit node
+        res_node = 0x0_1234_5678_ABCD  # truncated to 48 bits
+        self.assertEqual(self.uuid.uuid6(big_node).node, res_node)
+
+        # randomized tests
+        for _ in range(10):
+            # node with > 48 bits is truncated
+            for b in [24, 48, 72]:
+                node = (1 << (b - 1)) | random.getrandbits(b)
+                with self.subTest(node=node, bitlen=b):
+                    self.assertEqual(node.bit_length(), b)
+                    u = self.uuid.uuid6(node=node)
+                    self.assertEqual(u.node, node & 0xffff_ffff_ffff)
+
+    def test_uuid6_clock_seq(self):
+        # Make sure the supplied clock sequence appears in the UUID.
+        #
+        # For UUIDv6, clock sequence bits are stored from bit 48 to bit 62,
+        # with the convention that the least significant bit is bit 0 and
+        # the most significant bit is bit 127.
+        get_clock_seq = lambda u: (u.int >> 48) & 0x3fff
+
+        u = self.uuid.uuid6()
+        self.assertLessEqual(get_clock_seq(u).bit_length(), 14)
+
+        # tests with explicit values
+        big_clock_seq = 0xffff  # 16-bit clock sequence
+        res_clock_seq = 0x3fff  # truncated to 14 bits
+        u = self.uuid.uuid6(clock_seq=big_clock_seq)
+        self.assertEqual(get_clock_seq(u), res_clock_seq)
+
+        # some randomized tests
+        for _ in range(10):
+            # clock_seq with > 14 bits is truncated
+            for b in [7, 14, 28]:
+                node = random.getrandbits(48)
+                clock_seq = (1 << (b - 1)) | random.getrandbits(b)
+                with self.subTest(node=node, clock_seq=clock_seq, bitlen=b):
+                    self.assertEqual(clock_seq.bit_length(), b)
+                    u = self.uuid.uuid6(node=node, clock_seq=clock_seq)
+                    self.assertEqual(get_clock_seq(u), clock_seq & 0x3fff)
+
+    def test_uuid6_test_vectors(self):
+        equal = self.assertEqual
+        # https://www.rfc-editor.org/rfc/rfc9562#name-test-vectors
+        # (separators are put at the 12th and 28th bits)
+        timestamp = 0x1ec9414c_232a_b00
+        fake_nanoseconds = (timestamp - 0x1b21dd21_3814_000) * 100
+        # https://www.rfc-editor.org/rfc/rfc9562#name-example-of-a-uuidv6-value
+        node = 0x9f6bdeced846
+        clock_seq = (3 << 12) | 0x3c8
+
+        with (
+            mock.patch.object(self.uuid, '_last_timestamp_v6', None),
+            mock.patch('time.time_ns', return_value=fake_nanoseconds)
+        ):
+            u = self.uuid.uuid6(node=node, clock_seq=clock_seq)
+            equal(str(u).upper(), '1EC9414C-232A-6B00-B3C8-9F6BDECED846')
+            #   32          16      4      12       2      14         48
+            # time_hi | time_mid | ver | time_lo | var | clock_seq | node
+            equal(u.time, timestamp)
+            equal(u.int & 0xffff_ffff_ffff, node)
+            equal((u.int >> 48) & 0x3fff, clock_seq)
+            equal((u.int >> 62) & 0x3, 0b10)
+            equal((u.int >> 64) & 0xfff, 0xb00)
+            equal((u.int >> 76) & 0xf, 0x6)
+            equal((u.int >> 80) & 0xffff, 0x232a)
+            equal((u.int >> 96) & 0xffff_ffff, 0x1ec9_414c)
+
     def test_uuid8(self):
         equal = self.assertEqual
         u = self.uuid.uuid8()
diff --git a/Lib/uuid.py b/Lib/uuid.py
index 36809b85cb8ceb..ed69b4de07b53f 100644
--- a/Lib/uuid.py
+++ b/Lib/uuid.py
@@ -1,8 +1,8 @@
 r"""UUID objects (universally unique identifiers) according to RFC 4122/9562.
 
 This module provides immutable UUID objects (class UUID) and the functions
-uuid1(), uuid3(), uuid4(), uuid5(), and uuid8() for generating version 1, 3,
-4, 5, and 8 UUIDs as specified in RFC 4122/9562.
+uuid1(), uuid3(), uuid4(), uuid5(), uuid6(), and uuid8() for generating
+version 1, 3, 4, 5, 6, and 8 UUIDs as specified in RFC 4122/9562.
 
 If all you want is a unique ID, you should probably call uuid1() or uuid4().
 Note that uuid1() may compromise privacy since it creates a UUID containing
@@ -101,6 +101,7 @@ class SafeUUID:
 _RFC_4122_VERSION_3_FLAGS = ((3 << 76) | (0x8000 << 48))
 _RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48))
 _RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48))
+_RFC_4122_VERSION_6_FLAGS = ((6 << 76) | (0x8000 << 48))
 _RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48))
 
 
@@ -127,7 +128,9 @@ class UUID:
 
         fields      a tuple of the six integer fields of the UUID,
                     which are also available as six individual attributes
-                    and two derived attributes:
+                    and two derived attributes. The time_* attributes are
+                    only relevant to version 1, while the others are only
+                    relevant to versions 1 and 6:
 
             time_low                the first 32 bits of the UUID
             time_mid                the next 16 bits of the UUID
@@ -353,8 +356,19 @@ def clock_seq_low(self):
 
     @property
     def time(self):
-        return (((self.time_hi_version & 0x0fff) << 48) |
-                (self.time_mid << 32) | self.time_low)
+        if self.version == 6:
+            # time_hi (32) | time_mid (16) | ver (4) | time_lo (12) | ... (64)
+            time_hi = self.int >> 96
+            time_lo = (self.int >> 64) & 0x0fff
+            return time_hi << 28 | (self.time_mid << 12) | time_lo
+        else:
+            # time_lo (32) | time_mid (16) | ver (4) | time_hi (12) | ... (64)
+            #
+            # For compatibility purposes, we do not warn or raise when the
+            # version is not 1 (timestamp is irrelevant to other versions).
+            time_hi = (self.int >> 64) & 0x0fff
+            time_lo = self.int >> 96
+            return time_hi << 48 | (self.time_mid << 32) | time_lo
 
     @property
     def clock_seq(self):
@@ -756,6 +770,44 @@ def uuid5(namespace, name):
     int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS
     return UUID._from_int(int_uuid_5)
 
+_last_timestamp_v6 = None
+
+def uuid6(node=None, clock_seq=None):
+    """Similar to :func:`uuid1` but where fields are ordered differently
+    for improved DB locality.
+
+    More precisely, given a 60-bit timestamp value as specified for UUIDv1,
+    for UUIDv6 the first 48 most significant bits are stored first, followed
+    by the 4-bit version (same position), followed by the remaining 12 bits
+    of the original 60-bit timestamp.
+    """
+    global _last_timestamp_v6
+    import time
+    nanoseconds = time.time_ns()
+    # 0x01b21dd213814000 is the number of 100-ns intervals between the
+    # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
+    timestamp = nanoseconds // 100 + 0x01b21dd213814000
+    if _last_timestamp_v6 is not None and timestamp <= _last_timestamp_v6:
+        timestamp = _last_timestamp_v6 + 1
+    _last_timestamp_v6 = timestamp
+    if clock_seq is None:
+        import random
+        clock_seq = random.getrandbits(14)  # instead of stable storage
+    time_hi_and_mid = (timestamp >> 12) & 0xffff_ffff_ffff
+    time_lo = timestamp & 0x0fff  # keep 12 bits and clear version bits
+    clock_s = clock_seq & 0x3fff  # keep 14 bits and clear variant bits
+    if node is None:
+        node = getnode()
+    # --- 32 + 16 ---   -- 4 --   -- 12 --  -- 2 --   -- 14 ---    48
+    # time_hi_and_mid | version | time_lo | variant | clock_seq | node
+    int_uuid_6 = time_hi_and_mid << 80
+    int_uuid_6 |= time_lo << 64
+    int_uuid_6 |= clock_s << 48
+    int_uuid_6 |= node & 0xffff_ffff_ffff
+    # by construction, the variant and version bits are already cleared
+    int_uuid_6 |= _RFC_4122_VERSION_6_FLAGS
+    return UUID._from_int(int_uuid_6)
+
 def uuid8(a=None, b=None, c=None):
     """Generate a UUID from three custom blocks.
 
@@ -788,6 +840,7 @@ def main():
         "uuid3": uuid3,
         "uuid4": uuid4,
         "uuid5": uuid5,
+        "uuid6": uuid6,
         "uuid8": uuid8,
     }
     uuid_namespace_funcs = ("uuid3", "uuid5")
diff --git 
a/Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst 
b/Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst
new file mode 100644
index 00000000000000..f4bda53d1a67d5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst
@@ -0,0 +1,2 @@
+Add :func:`uuid.uuid6` for generating UUIDv6 objects as specified in
+:rfc:`9562`. Patch by Bénédikt Tran.

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to