This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new a29491af Remove recursive call from `ancestors_of` (#821)
a29491af is described below

commit a29491af52dc4aff46a325bbaac4a11c2f2bfabc
Author: Andre Luis Anastacio <[email protected]>
AuthorDate: Tue Jun 18 13:35:32 2024 -0300

    Remove recursive call from `ancestors_of` (#821)
---
 pyiceberg/table/snapshots.py | 11 +++---
 tests/conftest.py            | 86 +++++++++++++++++++++++++++++++++++++++++++-
 tests/table/test_init.py     | 15 ++++++++
 3 files changed, 106 insertions(+), 6 deletions(-)

diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py
index b21a0f56..d6a3ff16 100644
--- a/pyiceberg/table/snapshots.py
+++ b/pyiceberg/table/snapshots.py
@@ -421,8 +421,9 @@ def set_when_positive(properties: Dict[str, str], num: int, 
property_name: str)
 
 def ancestors_of(current_snapshot: Optional[Snapshot], table_metadata: 
TableMetadata) -> Iterable[Snapshot]:
     """Get the ancestors of and including the given snapshot."""
-    if current_snapshot:
-        yield current_snapshot
-        if current_snapshot.parent_snapshot_id is not None:
-            if parent := 
table_metadata.snapshot_by_id(current_snapshot.parent_snapshot_id):
-                yield from ancestors_of(parent, table_metadata)
+    snapshot = current_snapshot
+    while snapshot is not None:
+        yield snapshot
+        if snapshot.parent_snapshot_id is None:
+            break
+        snapshot = table_metadata.snapshot_by_id(snapshot.parent_snapshot_id)
diff --git a/tests/conftest.py b/tests/conftest.py
index a160322e..2092d93d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -29,10 +29,11 @@ import os
 import re
 import socket
 import string
+import time
 import uuid
 from datetime import date, datetime, timezone
 from pathlib import Path
-from random import choice
+from random import choice, randint
 from tempfile import TemporaryDirectory
 from typing import (
     TYPE_CHECKING,
@@ -731,6 +732,77 @@ def example_table_metadata_no_snapshot_v1() -> Dict[str, 
Any]:
     return EXAMPLE_TABLE_METADATA_NO_SNAPSHOT_V1
 
 
[email protected]
+def example_table_metadata_v2_with_extensive_snapshots() -> Dict[str, Any]:
+    def generate_snapshot(
+        snapshot_id: int,
+        parent_snapshot_id: Optional[int] = None,
+        timestamp_ms: Optional[int] = None,
+        sequence_number: int = 0,
+    ) -> Dict[str, Any]:
+        return {
+            "snapshot-id": snapshot_id,
+            "parent-snapshot-id": parent_snapshot_id,
+            "timestamp-ms": timestamp_ms or int(time.time() * 1000),
+            "sequence-number": sequence_number,
+            "summary": {"operation": "append"},
+            "manifest-list": f"s3://a/b/{snapshot_id}.avro",
+        }
+
+    snapshots = []
+    snapshot_log = []
+    initial_snapshot_id = 3051729675574597004
+
+    for i in range(2000):
+        snapshot_id = initial_snapshot_id + i
+        parent_snapshot_id = snapshot_id - 1 if i > 0 else None
+        timestamp_ms = int(time.time() * 1000) - randint(0, 1000000)
+        snapshots.append(generate_snapshot(snapshot_id, parent_snapshot_id, 
timestamp_ms, i))
+        snapshot_log.append({"snapshot-id": snapshot_id, "timestamp-ms": 
timestamp_ms})
+
+    return {
+        "format-version": 2,
+        "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+        "location": "s3://bucket/test/location",
+        "last-sequence-number": 34,
+        "last-updated-ms": 1602638573590,
+        "last-column-id": 3,
+        "current-schema-id": 1,
+        "schemas": [
+            {"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": 
"x", "required": True, "type": "long"}]},
+            {
+                "type": "struct",
+                "schema-id": 1,
+                "identifier-field-ids": [1, 2],
+                "fields": [
+                    {"id": 1, "name": "x", "required": True, "type": "long"},
+                    {"id": 2, "name": "y", "required": True, "type": "long", 
"doc": "comment"},
+                    {"id": 3, "name": "z", "required": True, "type": "long"},
+                ],
+            },
+        ],
+        "default-spec-id": 0,
+        "partition-specs": [{"spec-id": 0, "fields": [{"name": "x", 
"transform": "identity", "source-id": 1, "field-id": 1000}]}],
+        "last-partition-id": 1000,
+        "default-sort-order-id": 3,
+        "sort-orders": [
+            {
+                "order-id": 3,
+                "fields": [
+                    {"transform": "identity", "source-id": 2, "direction": 
"asc", "null-order": "nulls-first"},
+                    {"transform": "bucket[4]", "source-id": 3, "direction": 
"desc", "null-order": "nulls-last"},
+                ],
+            }
+        ],
+        "properties": {"read.split.target.size": "134217728"},
+        "current-snapshot-id": initial_snapshot_id + 1999,
+        "snapshots": snapshots,
+        "snapshot-log": snapshot_log,
+        "metadata-log": [{"metadata-file": "s3://bucket/.../v1.json", 
"timestamp-ms": 1515100}],
+        "refs": {"test": {"snapshot-id": initial_snapshot_id, "type": "tag", 
"max-ref-age-ms": 10000000}},
+    }
+
+
 EXAMPLE_TABLE_METADATA_V2 = {
     "format-version": 2,
     "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
@@ -1992,6 +2064,18 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) 
-> Table:
     )
 
 
[email protected]
+def 
table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots:
 Dict[str, Any]) -> Table:
+    table_metadata = 
TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots)
+    return Table(
+        identifier=("database", "table"),
+        metadata=table_metadata,
+        metadata_location=f"{table_metadata.location}/uuid.metadata.json",
+        io=load_file_io(),
+        catalog=NoopCatalog("NoopCatalog"),
+    )
+
+
 @pytest.fixture
 def bound_reference_str() -> BoundReference[str]:
     return BoundReference(field=NestedField(1, "field", StringType(), 
required=False), accessor=Accessor(position=0, inner=None))
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index c97b3a4a..6f8260fa 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -241,6 +241,21 @@ def test_ancestors_of(table_v2: Table) -> None:
     ]
 
 
+def test_ancestors_of_recursive_error(table_v2_with_extensive_snapshots: 
Table) -> None:
+    # Test RecursionError: maximum recursion depth exceeded
+    assert (
+        len(
+            list(
+                ancestors_of(
+                    table_v2_with_extensive_snapshots.current_snapshot(),
+                    table_v2_with_extensive_snapshots.metadata,
+                )
+            )
+        )
+        == 2000
+    )
+
+
 def test_snapshot_by_id_does_not_exist(table_v2: Table) -> None:
     assert table_v2.snapshot_by_id(-1) is None
 

Reply via email to