https://github.com/python/cpython/commit/990ad272f66fe6a50087ad044725bb0f9f8e181d commit: 990ad272f66fe6a50087ad044725bb0f9f8e181d branch: main author: Bénédikt Tran <10796600+picn...@users.noreply.github.com> committer: picnixz <10796600+picn...@users.noreply.github.com> date: 2025-03-02T12:41:56+01:00 summary:
gh-89083: add support for UUID version 6 (RFC 9562) (#120650) Add support for generating UUIDv6 objects according to RFC 9562, §5.6 [1]. The functionality is provided by the `uuid.uuid6()` function which takes as inputs an optional 48-bit hardware address and an optional 14-bit clock sequence. The UUIDv6 temporal fields are ordered differently than those of UUIDv1, thereby providing improved database locality. [1]: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.6 --------- Co-authored-by: Hugo van Kemenade <1324225+hug...@users.noreply.github.com> Co-authored-by: Victor Stinner <vstin...@python.org> files: A Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst M Doc/library/uuid.rst M Doc/whatsnew/3.14.rst M Lib/test/test_uuid.py M Lib/uuid.py diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index c661fa2e52565c..88e5fae70b76d9 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,8 +12,8 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`, -and :func:`uuid.uuid8` for generating version 1, 3, 4, 5, and 8 UUIDs as -specified in :rfc:`9562` (which supersedes :rfc:`4122`). +:func:`uuid6`, and :func:`uuid8` for generating version 1, 3, 4, 5, 6, +and 8 UUIDs as specified in :rfc:`9562` (which supersedes :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -153,8 +153,8 @@ which relays any information about the UUID's safety, using this enumeration: The UUID version number (1 through 8, meaningful only when the variant is :const:`RFC_4122`). - .. versionchanged:: 3.14 - Added UUID version 8. + .. versionchanged:: next + Added UUID versions 6 and 8. .. attribute:: UUID.is_safe @@ -212,6 +212,22 @@ The :mod:`uuid` module defines the following functions: that will be encoded using UTF-8). +.. function:: uuid6(node=None, clock_seq=None) + + Generate a UUID from a sequence number and the current time according to + :rfc:`9562`. + This is an alternative to :func:`uuid1` to improve database locality. + + When *node* is not specified, :func:`getnode` is used to obtain the hardware + address as a 48-bit positive integer. When a sequence number *clock_seq* is + not specified, a pseudo-random 14-bit positive integer is generated. + + If *node* or *clock_seq* exceed their expected bit count, only their least + significant bits are kept. + + .. versionadded:: next + + .. function:: uuid8(a=None, b=None, c=None) Generate a pseudo-random UUID according to @@ -314,7 +330,7 @@ The :mod:`uuid` module can be executed as a script from the command line. .. code-block:: sh - python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid8}] [-n NAMESPACE] [-N NAME] + python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid6,uuid8}] [-n NAMESPACE] [-N NAME] The following options are accepted: @@ -330,8 +346,8 @@ The following options are accepted: Specify the function name to use to generate the uuid. By default :func:`uuid4` is used. - .. versionadded:: 3.14 - Allow generating UUID version 8. + .. versionchanged:: next + Allow generating UUID versions 6 and 8. .. option:: -n <namespace> --namespace <namespace> diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index dbd59a9d7be150..ce75b5fffc0a4c 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -919,8 +919,8 @@ urllib uuid ---- -* Add support for UUID version 8 via :func:`uuid.uuid8` as specified - in :rfc:`9562`. +* Add support for UUID versions 6 and 8 via :func:`uuid.uuid6` and + :func:`uuid.uuid8` respectively, as specified in :rfc:`9562`. (Contributed by Bénédikt Tran in :gh:`89083`.) * :const:`uuid.NIL` and :const:`uuid.MAX` are now available to represent the diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 8216c4dd00e35a..e284de93fbdfd1 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1,6 +1,3 @@ -import unittest -from test import support -from test.support import import_helper import builtins import contextlib import copy @@ -10,10 +7,14 @@ import pickle import random import sys +import unittest import weakref from itertools import product from unittest import mock +from test import support +from test.support import import_helper + py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) c_uuid = import_helper.import_fresh_module('uuid', fresh=['_uuid']) @@ -724,6 +725,152 @@ def test_uuid5(self): equal(u, self.uuid.UUID(v)) equal(str(u), v) + def test_uuid6(self): + equal = self.assertEqual + u = self.uuid.uuid6() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 6) + + fake_nanoseconds = 0x1571_20a1_de1a_c533 + fake_node_value = 0x54e1_acf6_da7f + fake_clock_seq = 0x14c5 + with ( + mock.patch.object(self.uuid, '_last_timestamp_v6', None), + mock.patch.object(self.uuid, 'getnode', return_value=fake_node_value), + mock.patch('time.time_ns', return_value=fake_nanoseconds), + mock.patch('random.getrandbits', return_value=fake_clock_seq) + ): + u = self.uuid.uuid6() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 6) + + # 32 (top) | 16 (mid) | 12 (low) == 60 (timestamp) + equal(u.time, 0x1e901fca_7a55_b92) + equal(u.fields[0], 0x1e901fca) # 32 top bits of time + equal(u.fields[1], 0x7a55) # 16 mid bits of time + # 4 bits of version + 12 low bits of time + equal((u.fields[2] >> 12) & 0xf, 6) + equal((u.fields[2] & 0xfff), 0xb92) + # 2 bits of variant + 6 high bits of clock_seq + equal((u.fields[3] >> 6) & 0xf, 2) + equal(u.fields[3] & 0x3f, fake_clock_seq >> 8) + # 8 low bits of clock_seq + equal(u.fields[4], fake_clock_seq & 0xff) + equal(u.fields[5], fake_node_value) + + def test_uuid6_uniqueness(self): + # Test that UUIDv6-generated values are unique. + + # Unlike UUIDv8, only 62 bits can be randomized for UUIDv6. + # In practice, however, it remains unlikely to generate two + # identical UUIDs for the same 60-bit timestamp if neither + # the node ID nor the clock sequence is specified. + uuids = {self.uuid.uuid6() for _ in range(1000)} + self.assertEqual(len(uuids), 1000) + versions = {u.version for u in uuids} + self.assertSetEqual(versions, {6}) + + timestamp = 0x1ec9414c_232a_b00 + fake_nanoseconds = (timestamp - 0x1b21dd21_3814_000) * 100 + + with mock.patch('time.time_ns', return_value=fake_nanoseconds): + def gen(): + with mock.patch.object(self.uuid, '_last_timestamp_v6', None): + return self.uuid.uuid6(node=0, clock_seq=None) + + # By the birthday paradox, sampling N = 1024 UUIDs with identical + # node IDs and timestamps results in duplicates with probability + # close to 1 (not having a duplicate happens with probability of + # order 1E-15) since only the 14-bit clock sequence is randomized. + N = 1024 + uuids = {gen() for _ in range(N)} + self.assertSetEqual({u.node for u in uuids}, {0}) + self.assertSetEqual({u.time for u in uuids}, {timestamp}) + self.assertLess(len(uuids), N, 'collision property does not hold') + + def test_uuid6_node(self): + # Make sure the given node ID appears in the UUID. + # + # Note: when no node ID is specified, the same logic as for UUIDv1 + # is applied to UUIDv6. In particular, there is no need to test that + # getnode() correctly returns positive integers of exactly 48 bits + # since this is done in test_uuid1_eui64(). + self.assertLessEqual(self.uuid.uuid6().node.bit_length(), 48) + + self.assertEqual(self.uuid.uuid6(0).node, 0) + + # tests with explicit values + max_node = 0xffff_ffff_ffff + self.assertEqual(self.uuid.uuid6(max_node).node, max_node) + big_node = 0xE_1234_5678_ABCD # 52-bit node + res_node = 0x0_1234_5678_ABCD # truncated to 48 bits + self.assertEqual(self.uuid.uuid6(big_node).node, res_node) + + # randomized tests + for _ in range(10): + # node with > 48 bits is truncated + for b in [24, 48, 72]: + node = (1 << (b - 1)) | random.getrandbits(b) + with self.subTest(node=node, bitlen=b): + self.assertEqual(node.bit_length(), b) + u = self.uuid.uuid6(node=node) + self.assertEqual(u.node, node & 0xffff_ffff_ffff) + + def test_uuid6_clock_seq(self): + # Make sure the supplied clock sequence appears in the UUID. + # + # For UUIDv6, clock sequence bits are stored from bit 48 to bit 62, + # with the convention that the least significant bit is bit 0 and + # the most significant bit is bit 127. + get_clock_seq = lambda u: (u.int >> 48) & 0x3fff + + u = self.uuid.uuid6() + self.assertLessEqual(get_clock_seq(u).bit_length(), 14) + + # tests with explicit values + big_clock_seq = 0xffff # 16-bit clock sequence + res_clock_seq = 0x3fff # truncated to 14 bits + u = self.uuid.uuid6(clock_seq=big_clock_seq) + self.assertEqual(get_clock_seq(u), res_clock_seq) + + # some randomized tests + for _ in range(10): + # clock_seq with > 14 bits is truncated + for b in [7, 14, 28]: + node = random.getrandbits(48) + clock_seq = (1 << (b - 1)) | random.getrandbits(b) + with self.subTest(node=node, clock_seq=clock_seq, bitlen=b): + self.assertEqual(clock_seq.bit_length(), b) + u = self.uuid.uuid6(node=node, clock_seq=clock_seq) + self.assertEqual(get_clock_seq(u), clock_seq & 0x3fff) + + def test_uuid6_test_vectors(self): + equal = self.assertEqual + # https://www.rfc-editor.org/rfc/rfc9562#name-test-vectors + # (separators are put at the 12th and 28th bits) + timestamp = 0x1ec9414c_232a_b00 + fake_nanoseconds = (timestamp - 0x1b21dd21_3814_000) * 100 + # https://www.rfc-editor.org/rfc/rfc9562#name-example-of-a-uuidv6-value + node = 0x9f6bdeced846 + clock_seq = (3 << 12) | 0x3c8 + + with ( + mock.patch.object(self.uuid, '_last_timestamp_v6', None), + mock.patch('time.time_ns', return_value=fake_nanoseconds) + ): + u = self.uuid.uuid6(node=node, clock_seq=clock_seq) + equal(str(u).upper(), '1EC9414C-232A-6B00-B3C8-9F6BDECED846') + # 32 16 4 12 2 14 48 + # time_hi | time_mid | ver | time_lo | var | clock_seq | node + equal(u.time, timestamp) + equal(u.int & 0xffff_ffff_ffff, node) + equal((u.int >> 48) & 0x3fff, clock_seq) + equal((u.int >> 62) & 0x3, 0b10) + equal((u.int >> 64) & 0xfff, 0xb00) + equal((u.int >> 76) & 0xf, 0x6) + equal((u.int >> 80) & 0xffff, 0x232a) + equal((u.int >> 96) & 0xffff_ffff, 0x1ec9_414c) + def test_uuid8(self): equal = self.assertEqual u = self.uuid.uuid8() diff --git a/Lib/uuid.py b/Lib/uuid.py index 36809b85cb8ceb..ed69b4de07b53f 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,8 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122/9562. This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5(), and uuid8() for generating version 1, 3, -4, 5, and 8 UUIDs as specified in RFC 4122/9562. +uuid1(), uuid3(), uuid4(), uuid5(), uuid6(), and uuid8() for generating +version 1, 3, 4, 5, 6, and 8 UUIDs as specified in RFC 4122/9562. If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -101,6 +101,7 @@ class SafeUUID: _RFC_4122_VERSION_3_FLAGS = ((3 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_6_FLAGS = ((6 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48)) @@ -127,7 +128,9 @@ class UUID: fields a tuple of the six integer fields of the UUID, which are also available as six individual attributes - and two derived attributes: + and two derived attributes. The time_* attributes are + only relevant to version 1, while the others are only + relevant to versions 1 and 6: time_low the first 32 bits of the UUID time_mid the next 16 bits of the UUID @@ -353,8 +356,19 @@ def clock_seq_low(self): @property def time(self): - return (((self.time_hi_version & 0x0fff) << 48) | - (self.time_mid << 32) | self.time_low) + if self.version == 6: + # time_hi (32) | time_mid (16) | ver (4) | time_lo (12) | ... (64) + time_hi = self.int >> 96 + time_lo = (self.int >> 64) & 0x0fff + return time_hi << 28 | (self.time_mid << 12) | time_lo + else: + # time_lo (32) | time_mid (16) | ver (4) | time_hi (12) | ... (64) + # + # For compatibility purposes, we do not warn or raise when the + # version is not 1 (timestamp is irrelevant to other versions). + time_hi = (self.int >> 64) & 0x0fff + time_lo = self.int >> 96 + return time_hi << 48 | (self.time_mid << 32) | time_lo @property def clock_seq(self): @@ -756,6 +770,44 @@ def uuid5(namespace, name): int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS return UUID._from_int(int_uuid_5) +_last_timestamp_v6 = None + +def uuid6(node=None, clock_seq=None): + """Similar to :func:`uuid1` but where fields are ordered differently + for improved DB locality. + + More precisely, given a 60-bit timestamp value as specified for UUIDv1, + for UUIDv6 the first 48 most significant bits are stored first, followed + by the 4-bit version (same position), followed by the remaining 12 bits + of the original 60-bit timestamp. + """ + global _last_timestamp_v6 + import time + nanoseconds = time.time_ns() + # 0x01b21dd213814000 is the number of 100-ns intervals between the + # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. + timestamp = nanoseconds // 100 + 0x01b21dd213814000 + if _last_timestamp_v6 is not None and timestamp <= _last_timestamp_v6: + timestamp = _last_timestamp_v6 + 1 + _last_timestamp_v6 = timestamp + if clock_seq is None: + import random + clock_seq = random.getrandbits(14) # instead of stable storage + time_hi_and_mid = (timestamp >> 12) & 0xffff_ffff_ffff + time_lo = timestamp & 0x0fff # keep 12 bits and clear version bits + clock_s = clock_seq & 0x3fff # keep 14 bits and clear variant bits + if node is None: + node = getnode() + # --- 32 + 16 --- -- 4 -- -- 12 -- -- 2 -- -- 14 --- 48 + # time_hi_and_mid | version | time_lo | variant | clock_seq | node + int_uuid_6 = time_hi_and_mid << 80 + int_uuid_6 |= time_lo << 64 + int_uuid_6 |= clock_s << 48 + int_uuid_6 |= node & 0xffff_ffff_ffff + # by construction, the variant and version bits are already cleared + int_uuid_6 |= _RFC_4122_VERSION_6_FLAGS + return UUID._from_int(int_uuid_6) + def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -788,6 +840,7 @@ def main(): "uuid3": uuid3, "uuid4": uuid4, "uuid5": uuid5, + "uuid6": uuid6, "uuid8": uuid8, } uuid_namespace_funcs = ("uuid3", "uuid5") diff --git a/Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst b/Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst new file mode 100644 index 00000000000000..f4bda53d1a67d5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-17-17-31-27.gh-issue-89083.nW00Yq.rst @@ -0,0 +1,2 @@ +Add :func:`uuid.uuid6` for generating UUIDv6 objects as specified in +:rfc:`9562`. Patch by Bénédikt Tran. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com