This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 6a77195 Add logic to generate a new snapshot-id (#37)
6a77195 is described below
commit 6a7719550ba8b8d40911a401896da899619afd47
Author: Fokko Driesprong <[email protected]>
AuthorDate: Thu Oct 5 10:28:56 2023 +0200
Add logic to generate a new snapshot-id (#37)
* Add logic to generate a new snapshot-id
* Use the xor approach
* Closer to the Java way of doing it
---
pyiceberg/table/__init__.py | 23 +++++++++++++++++++++++
tests/table/test_init.py | 6 ++++++
2 files changed, 29 insertions(+)
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 8443315..6e71a40 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -17,6 +17,7 @@
from __future__ import annotations
import itertools
+import uuid
from abc import ABC, abstractmethod
from copy import copy
from dataclasses import dataclass
@@ -498,6 +499,14 @@ class Table:
"""Return the table's base location."""
return self.metadata.location
+ def new_snapshot_id(self) -> int:
+ """Generate a new snapshot-id that's not in use."""
+ snapshot_id = _generate_snapshot_id()
+ while self.snapshot_by_id(snapshot_id) is not None:
+ snapshot_id = _generate_snapshot_id()
+
+ return snapshot_id
+
def current_snapshot(self) -> Optional[Snapshot]:
"""Get the current snapshot for this table, or None if there is no
current snapshot."""
if snapshot_id := self.metadata.current_snapshot_id:
@@ -1566,3 +1575,17 @@ def _add_and_move_fields(
elif len(moves) > 0:
return _move_fields(fields, moves)
return None if len(adds) == 0 else tuple(*fields, *adds)
+
+
+def _generate_snapshot_id() -> int:
+ """Generate a new Snapshot ID from a UUID.
+
+ Returns: An 64 bit long
+ """
+ rnd_uuid = uuid.uuid4()
+ snapshot_id = int.from_bytes(
+ bytes(lhs ^ rhs for lhs, rhs in zip(rnd_uuid.bytes[0:8],
rnd_uuid.bytes[8:16])), byteorder='little', signed=True
+ )
+ snapshot_id = snapshot_id if snapshot_id >= 0 else snapshot_id * -1
+
+ return snapshot_id
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index 8fd5e2b..369df4f 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -41,6 +41,7 @@ from pyiceberg.table import (
StaticTable,
Table,
UpdateSchema,
+ _generate_snapshot_id,
_match_deletes_to_datafile,
)
from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER
@@ -506,3 +507,8 @@ def test_add_nested_list_type_column(table: Table) -> None:
element_required=False,
)
assert new_schema.highest_field_id == 7
+
+
+def test_generate_snapshot_id(table: Table) -> None:
+ assert isinstance(_generate_snapshot_id(), int)
+ assert isinstance(table.new_snapshot_id(), int)