https://github.com/python/cpython/commit/3929af5e3a203291dc07b40c9c3515492e3ba7b4 commit: 3929af5e3a203291dc07b40c9c3515492e3ba7b4 branch: main author: Bénédikt Tran <10796600+picn...@users.noreply.github.com> committer: picnixz <10796600+picn...@users.noreply.github.com> date: 2025-03-04T10:47:19+01:00 summary:
gh-89083: add support for UUID version 7 (RFC 9562) (#121119) Add support for generating UUIDv7 objects according to RFC 9562, §5.7 [1]. The functionality is provided by the `uuid.uuid7()` function. The implementation is based on a 42-bit counter as described by Method 1, §6.2 [2] and guarantees monotonicity within the same millisecond. [1]: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.7 [2]: https://www.rfc-editor.org/rfc/rfc9562.html#section-6.2 --------- Co-authored-by: Hugo van Kemenade <1324225+hug...@users.noreply.github.com> Co-authored-by: Victor Stinner <vstin...@python.org> Co-authored-by: Éric <mer...@netwok.org> files: A Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst M Doc/library/uuid.rst M Doc/whatsnew/3.14.rst M Lib/test/test_uuid.py M Lib/uuid.py diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 88e5fae70b76d9..0fb29460e2e68a 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -11,9 +11,10 @@ -------------- This module provides immutable :class:`UUID` objects (the :class:`UUID` class) -and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`, -:func:`uuid6`, and :func:`uuid8` for generating version 1, 3, 4, 5, 6, -and 8 UUIDs as specified in :rfc:`9562` (which supersedes :rfc:`4122`). +and :ref:`functions <uuid-factory-functions>` for generating UUIDs corresponding +to a specific UUID version as specified in :rfc:`9562` (which supersedes :rfc:`4122`), +for example, :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 3, and so on. +Note that UUID version 2 is deliberately omitted as it is outside the scope of the RFC. If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -154,7 +155,7 @@ which relays any information about the UUID's safety, using this enumeration: :const:`RFC_4122`). .. versionchanged:: next - Added UUID versions 6 and 8. + Added UUID versions 6, 7 and 8. .. attribute:: UUID.is_safe @@ -185,6 +186,8 @@ The :mod:`uuid` module defines the following functions: globally unique, while the latter are not. +.. _uuid-factory-functions: + .. function:: uuid1(node=None, clock_seq=None) Generate a UUID from a host ID, sequence number, and the current time. If *node* @@ -228,6 +231,18 @@ The :mod:`uuid` module defines the following functions: .. versionadded:: next +.. function:: uuid7() + + Generate a time-based UUID according to + :rfc:`RFC 9562, §5.7 <9562#section-5.7>`. + + For portability across platforms lacking sub-millisecond precision, UUIDs + produced by this function embed a 48-bit timestamp and use a 42-bit counter + to guarantee monotonicity within a millisecond. + + .. versionadded:: next + + .. function:: uuid8(a=None, b=None, c=None) Generate a pseudo-random UUID according to @@ -330,7 +345,7 @@ The :mod:`uuid` module can be executed as a script from the command line. .. code-block:: sh - python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid6,uuid8}] [-n NAMESPACE] [-N NAME] + python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid6,uuid7,uuid8}] [-n NAMESPACE] [-N NAME] The following options are accepted: @@ -347,7 +362,7 @@ The following options are accepted: is used. .. versionchanged:: next - Allow generating UUID versions 6 and 8. + Allow generating UUID versions 6, 7 and 8. .. option:: -n <namespace> --namespace <namespace> diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 2875913e4abdf1..0721ea5cb8d5d1 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -924,8 +924,9 @@ urllib uuid ---- -* Add support for UUID versions 6 and 8 via :func:`uuid.uuid6` and - :func:`uuid.uuid8` respectively, as specified in :rfc:`9562`. +* Add support for UUID versions 6, 7, and 8 via :func:`uuid.uuid6`, + :func:`uuid.uuid7`, and :func:`uuid.uuid8` respectively, as specified + in :rfc:`9562`. (Contributed by Bénédikt Tran in :gh:`89083`.) * :const:`uuid.NIL` and :const:`uuid.MAX` are now available to represent the diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index e284de93fbdfd1..bcc673233f6588 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -871,6 +871,206 @@ def test_uuid6_test_vectors(self): equal((u.int >> 80) & 0xffff, 0x232a) equal((u.int >> 96) & 0xffff_ffff, 0x1ec9_414c) + def test_uuid7(self): + equal = self.assertEqual + u = self.uuid.uuid7() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + for _ in range(100): + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(30) + counter = (counter_hi << 30) | counter_lo + + tail = random.getrandbits(32) + # effective number of bits is 32 + 30 + 11 = 73 + random_bits = counter << 32 | tail + + # set all remaining MSB of fake random bits to 1 to ensure that + # the implementation correctly removes them + random_bits = (((1 << 7) - 1) << 73) | random_bits + random_data = random_bits.to_bytes(10) + + with ( + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=None, + _last_counter_v7=0, + ), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_once_with(10) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + equal(self.uuid._last_timestamp_v7, timestamp_ms) + equal(self.uuid._last_counter_v7, counter) + + unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + + equal((u.int >> 75) & 1, 0) # check that the MSB is 0 + equal((u.int >> 64) & 0xfff, counter_hi) + equal((u.int >> 32) & 0x3fff_ffff, counter_lo) + equal(u.int & 0xffff_ffff, tail) + + def test_uuid7_uniqueness(self): + # Test that UUIDv7-generated values are unique. + # + # While UUIDv8 has an entropy of 122 bits, those 122 bits may not + # necessarily be sampled from a PRNG. On the other hand, UUIDv7 + # uses os.urandom() as a PRNG which features better randomness. + N = 1000 + uuids = {self.uuid.uuid7() for _ in range(N)} + self.assertEqual(len(uuids), N) + + versions = {u.version for u in uuids} + self.assertSetEqual(versions, {7}) + + def test_uuid7_monotonicity(self): + equal = self.assertEqual + + us = [self.uuid.uuid7() for _ in range(10_000)] + equal(us, sorted(us)) + + with mock.patch.multiple( + self.uuid, + _last_timestamp_v7=0, + _last_counter_v7=0, + ): + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + # counter_{hi,lo} are chosen so that "counter + 1" does not overflow + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(29) + counter = (counter_hi << 30) | counter_lo + self.assertLess(counter + 1, 0x3ff_ffff_ffff) + + tail = random.getrandbits(32) + random_bits = counter << 32 | tail + random_data = random_bits.to_bytes(10) + + with ( + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u1 = self.uuid.uuid7() + urand.assert_called_once_with(10) + equal(self.uuid._last_timestamp_v7, timestamp_ms) + equal(self.uuid._last_counter_v7, counter) + equal((u1.int >> 64) & 0xfff, counter_hi) + equal((u1.int >> 32) & 0x3fff_ffff, counter_lo) + equal(u1.int & 0xffff_ffff, tail) + + # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same ns) + next_timestamp_ns = 1672533296_123_457_032 + next_timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + equal(timestamp_ms, next_timestamp_ms) + + next_tail_bytes = os.urandom(4) + next_fail = int.from_bytes(next_tail_bytes) + + with ( + mock.patch('time.time_ns', return_value=next_timestamp_ns), + mock.patch('os.urandom', return_value=next_tail_bytes) as urand + ): + u2 = self.uuid.uuid7() + urand.assert_called_once_with(4) + # same milli-second + equal(self.uuid._last_timestamp_v7, timestamp_ms) + # 42-bit counter advanced by 1 + equal(self.uuid._last_counter_v7, counter + 1) + equal((u2.int >> 64) & 0xfff, counter_hi) + equal((u2.int >> 32) & 0x3fff_ffff, counter_lo + 1) + equal(u2.int & 0xffff_ffff, next_fail) + + self.assertLess(u1, u2) + + def test_uuid7_timestamp_backwards(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + fake_last_timestamp_v7 = timestamp_ms + 1 + + # counter_{hi,lo} are chosen so that "counter + 1" does not overflow + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(29) + counter = (counter_hi << 30) | counter_lo + self.assertLess(counter + 1, 0x3ff_ffff_ffff) + + tail_bytes = os.urandom(4) + tail = int.from_bytes(tail_bytes) + + with ( + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=fake_last_timestamp_v7, + _last_counter_v7=counter, + ), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=tail_bytes) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_once_with(4) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1) + unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + # 42-bit counter advanced by 1 + equal(self.uuid._last_counter_v7, counter + 1) + equal((u.int >> 64) & 0xfff, counter_hi) + # 42-bit counter advanced by 1 (counter_hi is untouched) + equal((u.int >> 32) & 0x3fff_ffff, counter_lo + 1) + equal(u.int & 0xffff_ffff, tail) + + def test_uuid7_overflow_counter(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + new_counter_hi = random.getrandbits(11) + new_counter_lo = random.getrandbits(30) + new_counter = (new_counter_hi << 30) | new_counter_lo + + tail = random.getrandbits(32) + random_bits = (new_counter << 32) | tail + random_data = random_bits.to_bytes(10) + + with ( + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=timestamp_ms, + # same timestamp, but force an overflow on the counter + _last_counter_v7=0x3ff_ffff_ffff, + ), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_with(10) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + # timestamp advanced due to overflow + equal(self.uuid._last_timestamp_v7, timestamp_ms + 1) + unix_ts_ms = (timestamp_ms + 1) & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + # counter overflowed, so we picked a new one + equal(self.uuid._last_counter_v7, new_counter) + equal((u.int >> 64) & 0xfff, new_counter_hi) + equal((u.int >> 32) & 0x3fff_ffff, new_counter_lo) + equal(u.int & 0xffff_ffff, tail) + def test_uuid8(self): equal = self.assertEqual u = self.uuid.uuid8() diff --git a/Lib/uuid.py b/Lib/uuid.py index ed69b4de07b53f..7a12b48cb008f1 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,12 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122/9562. -This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5(), uuid6(), and uuid8() for generating -version 1, 3, 4, 5, 6, and 8 UUIDs as specified in RFC 4122/9562. +This module provides immutable UUID objects (class UUID) and functions for +generating UUIDs corresponding to a specific UUID version as specified in +RFC 4122/9562, e.g., uuid1() for UUID version 1, uuid3() for UUID version 3, +and so on. + +Note that UUID version 2 is deliberately omitted as it is outside the scope +of the RFC. If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -54,6 +58,7 @@ import os import sys +import time from enum import Enum, _simple_enum @@ -102,6 +107,7 @@ class SafeUUID: _RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_6_FLAGS = ((6 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_7_FLAGS = ((7 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48)) @@ -720,7 +726,6 @@ def uuid1(node=None, clock_seq=None): return UUID(bytes=uuid_time, is_safe=is_safe) global _last_timestamp - import time nanoseconds = time.time_ns() # 0x01b21dd213814000 is the number of 100-ns intervals between the # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. @@ -770,6 +775,7 @@ def uuid5(namespace, name): int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS return UUID._from_int(int_uuid_5) + _last_timestamp_v6 = None def uuid6(node=None, clock_seq=None): @@ -808,6 +814,83 @@ def uuid6(node=None, clock_seq=None): int_uuid_6 |= _RFC_4122_VERSION_6_FLAGS return UUID._from_int(int_uuid_6) + +_last_timestamp_v7 = None +_last_counter_v7 = 0 # 42-bit counter + +def _uuid7_get_counter_and_tail(): + rand = int.from_bytes(os.urandom(10)) + # 42-bit counter with MSB set to 0 + counter = (rand >> 32) & 0x1ff_ffff_ffff + # 32-bit random data + tail = rand & 0xffff_ffff + return counter, tail + + +def uuid7(): + """Generate a UUID from a Unix timestamp in milliseconds and random bits. + + UUIDv7 objects feature monotonicity within a millisecond. + """ + # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - + # unix_ts_ms | version | counter_hi | variant | counter_lo | random + # + # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed + # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0. + # + # 'random' is a 32-bit random value regenerated for every new UUID. + # + # If multiple UUIDs are generated within the same millisecond, the LSB + # of 'counter' is incremented by 1. When overflowing, the timestamp is + # advanced and the counter is reset to a random 42-bit integer with MSB + # set to 0. + + global _last_timestamp_v7 + global _last_counter_v7 + + nanoseconds = time.time_ns() + timestamp_ms = nanoseconds // 1_000_000 + + if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: + counter, tail = _uuid7_get_counter_and_tail() + else: + if timestamp_ms < _last_timestamp_v7: + timestamp_ms = _last_timestamp_v7 + 1 + # advance the 42-bit counter + counter = _last_counter_v7 + 1 + if counter > 0x3ff_ffff_ffff: + # advance the 48-bit timestamp + timestamp_ms += 1 + counter, tail = _uuid7_get_counter_and_tail() + else: + # 32-bit random data + tail = int.from_bytes(os.urandom(4)) + + unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff + counter_msbs = counter >> 30 + # keep 12 counter's MSBs and clear variant bits + counter_hi = counter_msbs & 0x0fff + # keep 30 counter's LSBs and clear version bits + counter_lo = counter & 0x3fff_ffff + # ensure that the tail is always a 32-bit integer (by construction, + # it is already the case, but future interfaces may allow the user + # to specify the random tail) + tail &= 0xffff_ffff + + int_uuid_7 = unix_ts_ms << 80 + int_uuid_7 |= counter_hi << 64 + int_uuid_7 |= counter_lo << 32 + int_uuid_7 |= tail + # by construction, the variant and version bits are already cleared + int_uuid_7 |= _RFC_4122_VERSION_7_FLAGS + res = UUID._from_int(int_uuid_7) + + # defer global update until all computations are done + _last_timestamp_v7 = timestamp_ms + _last_counter_v7 = counter + return res + + def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -833,6 +916,7 @@ def uuid8(a=None, b=None, c=None): int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS return UUID._from_int(int_uuid_8) + def main(): """Run the uuid command line interface.""" uuid_funcs = { @@ -841,6 +925,7 @@ def main(): "uuid4": uuid4, "uuid5": uuid5, "uuid6": uuid6, + "uuid7": uuid7, "uuid8": uuid8, } uuid_namespace_funcs = ("uuid3", "uuid5") diff --git a/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst new file mode 100644 index 00000000000000..f85e05622623c2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst @@ -0,0 +1,2 @@ +Add :func:`uuid.uuid7` for generating UUIDv7 objects as specified in +:rfc:`9562`. Patch by Bénédikt Tran. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com