This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new a29491af Remove recursive call from `ancestors_of` (#821)
a29491af is described below
commit a29491af52dc4aff46a325bbaac4a11c2f2bfabc
Author: Andre Luis Anastacio <[email protected]>
AuthorDate: Tue Jun 18 13:35:32 2024 -0300
Remove recursive call from `ancestors_of` (#821)
---
pyiceberg/table/snapshots.py | 11 +++---
tests/conftest.py | 86 +++++++++++++++++++++++++++++++++++++++++++-
tests/table/test_init.py | 15 ++++++++
3 files changed, 106 insertions(+), 6 deletions(-)
diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py
index b21a0f56..d6a3ff16 100644
--- a/pyiceberg/table/snapshots.py
+++ b/pyiceberg/table/snapshots.py
@@ -421,8 +421,9 @@ def set_when_positive(properties: Dict[str, str], num: int,
property_name: str)
def ancestors_of(current_snapshot: Optional[Snapshot], table_metadata:
TableMetadata) -> Iterable[Snapshot]:
"""Get the ancestors of and including the given snapshot."""
- if current_snapshot:
- yield current_snapshot
- if current_snapshot.parent_snapshot_id is not None:
- if parent :=
table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id):
- yield from ancestors_of(parent, table_metadata)
+ snapshot = current_snapshot
+ while snapshot is not None:
+ yield snapshot
+ if snapshot.parent_snapshot_id is None:
+ break
+ snapshot = table_metadata.snapshot_by_id(snapshot.parent_snapshot_id)
diff --git a/tests/conftest.py b/tests/conftest.py
index a160322e..2092d93d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -29,10 +29,11 @@ import os
import re
import socket
import string
+import time
import uuid
from datetime import date, datetime, timezone
from pathlib import Path
-from random import choice
+from random import choice, randint
from tempfile import TemporaryDirectory
from typing import (
TYPE_CHECKING,
@@ -731,6 +732,77 @@ def example_table_metadata_no_snapshot_v1() -> Dict[str,
Any]:
return EXAMPLE_TABLE_METADATA_NO_SNAPSHOT_V1
[email protected]
+def example_table_metadata_v2_with_extensive_snapshots() -> Dict[str, Any]:
+ def generate_snapshot(
+ snapshot_id: int,
+ parent_snapshot_id: Optional[int] = None,
+ timestamp_ms: Optional[int] = None,
+ sequence_number: int = 0,
+ ) -> Dict[str, Any]:
+ return {
+ "snapshot-id": snapshot_id,
+ "parent-snapshot-id": parent_snapshot_id,
+ "timestamp-ms": timestamp_ms or int(time.time() * 1000),
+ "sequence-number": sequence_number,
+ "summary": {"operation": "append"},
+ "manifest-list": f"s3://a/b/{snapshot_id}.avro",
+ }
+
+ snapshots = []
+ snapshot_log = []
+ initial_snapshot_id = 3051729675574597004
+
+ for i in range(2000):
+ snapshot_id = initial_snapshot_id + i
+ parent_snapshot_id = snapshot_id - 1 if i > 0 else None
+ timestamp_ms = int(time.time() * 1000) - randint(0, 1000000)
+ snapshots.append(generate_snapshot(snapshot_id, parent_snapshot_id,
timestamp_ms, i))
+ snapshot_log.append({"snapshot-id": snapshot_id, "timestamp-ms":
timestamp_ms})
+
+ return {
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 3,
+ "current-schema-id": 1,
+ "schemas": [
+ {"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name":
"x", "required": True, "type": "long"}]},
+ {
+ "type": "struct",
+ "schema-id": 1,
+ "identifier-field-ids": [1, 2],
+ "fields": [
+ {"id": 1, "name": "x", "required": True, "type": "long"},
+ {"id": 2, "name": "y", "required": True, "type": "long",
"doc": "comment"},
+ {"id": 3, "name": "z", "required": True, "type": "long"},
+ ],
+ },
+ ],
+ "default-spec-id": 0,
+ "partition-specs": [{"spec-id": 0, "fields": [{"name": "x",
"transform": "identity", "source-id": 1, "field-id": 1000}]}],
+ "last-partition-id": 1000,
+ "default-sort-order-id": 3,
+ "sort-orders": [
+ {
+ "order-id": 3,
+ "fields": [
+ {"transform": "identity", "source-id": 2, "direction":
"asc", "null-order": "nulls-first"},
+ {"transform": "bucket[4]", "source-id": 3, "direction":
"desc", "null-order": "nulls-last"},
+ ],
+ }
+ ],
+ "properties": {"read.split.target.size": "134217728"},
+ "current-snapshot-id": initial_snapshot_id + 1999,
+ "snapshots": snapshots,
+ "snapshot-log": snapshot_log,
+ "metadata-log": [{"metadata-file": "s3://bucket/.../v1.json",
"timestamp-ms": 1515100}],
+ "refs": {"test": {"snapshot-id": initial_snapshot_id, "type": "tag",
"max-ref-age-ms": 10000000}},
+ }
+
+
EXAMPLE_TABLE_METADATA_V2 = {
"format-version": 2,
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
@@ -1992,6 +2064,18 @@ def table_v2(example_table_metadata_v2: Dict[str, Any])
-> Table:
)
[email protected]
+def
table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots:
Dict[str, Any]) -> Table:
+ table_metadata =
TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots)
+ return Table(
+ identifier=("database", "table"),
+ metadata=table_metadata,
+ metadata_location=f"{table_metadata.location}/uuid.metadata.json",
+ io=load_file_io(),
+ catalog=NoopCatalog("NoopCatalog"),
+ )
+
+
@pytest.fixture
def bound_reference_str() -> BoundReference[str]:
return BoundReference(field=NestedField(1, "field", StringType(),
required=False), accessor=Accessor(position=0, inner=None))
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index c97b3a4a..6f8260fa 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -241,6 +241,21 @@ def test_ancestors_of(table_v2: Table) -> None:
]
+def test_ancestors_of_recursive_error(table_v2_with_extensive_snapshots:
Table) -> None:
+ # Test RecursionError: maximum recursion depth exceeded
+ assert (
+ len(
+ list(
+ ancestors_of(
+ table_v2_with_extensive_snapshots.current_snapshot(),
+ table_v2_with_extensive_snapshots.metadata,
+ )
+ )
+ )
+ == 2000
+ )
+
+
def test_snapshot_by_id_does_not_exist(table_v2: Table) -> None:
assert table_v2.snapshot_by_id(-1) is None