This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 6a77195  Add logic to generate a new snapshot-id (#37)
6a77195 is described below

commit 6a7719550ba8b8d40911a401896da899619afd47
Author: Fokko Driesprong <[email protected]>
AuthorDate: Thu Oct 5 10:28:56 2023 +0200

    Add logic to generate a new snapshot-id (#37)
    
    * Add logic to generate a new snapshot-id
    
    * Use the xor approach
    
    * Closer to the Java way of doing it
---
 pyiceberg/table/__init__.py | 23 +++++++++++++++++++++++
 tests/table/test_init.py    |  6 ++++++
 2 files changed, 29 insertions(+)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 8443315..6e71a40 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -17,6 +17,7 @@
 from __future__ import annotations
 
 import itertools
+import uuid
 from abc import ABC, abstractmethod
 from copy import copy
 from dataclasses import dataclass
@@ -498,6 +499,14 @@ class Table:
         """Return the table's base location."""
         return self.metadata.location
 
+    def new_snapshot_id(self) -> int:
+        """Generate a new snapshot-id that's not in use."""
+        snapshot_id = _generate_snapshot_id()
+        while self.snapshot_by_id(snapshot_id) is not None:
+            snapshot_id = _generate_snapshot_id()
+
+        return snapshot_id
+
     def current_snapshot(self) -> Optional[Snapshot]:
         """Get the current snapshot for this table, or None if there is no 
current snapshot."""
         if snapshot_id := self.metadata.current_snapshot_id:
@@ -1566,3 +1575,17 @@ def _add_and_move_fields(
     elif len(moves) > 0:
         return _move_fields(fields, moves)
     return None if len(adds) == 0 else tuple(*fields, *adds)
+
+
+def _generate_snapshot_id() -> int:
+    """Generate a new Snapshot ID from a UUID.
+
+    Returns: An 64 bit long
+    """
+    rnd_uuid = uuid.uuid4()
+    snapshot_id = int.from_bytes(
+        bytes(lhs ^ rhs for lhs, rhs in zip(rnd_uuid.bytes[0:8], 
rnd_uuid.bytes[8:16])), byteorder='little', signed=True
+    )
+    snapshot_id = snapshot_id if snapshot_id >= 0 else snapshot_id * -1
+
+    return snapshot_id
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index 8fd5e2b..369df4f 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -41,6 +41,7 @@ from pyiceberg.table import (
     StaticTable,
     Table,
     UpdateSchema,
+    _generate_snapshot_id,
     _match_deletes_to_datafile,
 )
 from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER
@@ -506,3 +507,8 @@ def test_add_nested_list_type_column(table: Table) -> None:
         element_required=False,
     )
     assert new_schema.highest_field_id == 7
+
+
+def test_generate_snapshot_id(table: Table) -> None:
+    assert isinstance(_generate_snapshot_id(), int)
+    assert isinstance(table.new_snapshot_id(), int)

Reply via email to