smaheshwar-pltr commented on code in PR #3220:
URL: https://github.com/apache/iceberg-python/pull/3220#discussion_r3270889404
##########
tests/catalog/test_catalog_behaviors.py:
##########
@@ -388,6 +389,380 @@ def test_load_table_from_self_identifier(
assert table.metadata == loaded_table.metadata
+_SIMPLE_SCHEMA = Schema(
+ NestedField(field_id=1, name="id", field_type=LongType(), required=False),
+ NestedField(field_id=2, name="data", field_type=StringType(),
required=False),
+)
+
+
+def _create_simple_table(
+ catalog: Catalog,
+ identifier: Identifier,
+ *,
+ schema: Schema = _SIMPLE_SCHEMA,
+ format_version: int = 2,
+ partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
+ properties: dict[str, str] | None = None,
+) -> tuple[Identifier, Schema]:
+ namespace = Catalog.namespace_from(identifier)
+ catalog.create_namespace_if_not_exists(namespace)
+ merged_properties = {"format-version": str(format_version), **(properties
or {})}
+ catalog.create_table(identifier, schema=schema,
partition_spec=partition_spec, properties=merged_properties)
+ return identifier, schema
+
+
+def _simple_data(num_rows: int = 2) -> pa.Table:
+ return pa.Table.from_pydict(
+ {"id": list(range(num_rows)), "data": [chr(ord("a") + i) for i in
range(num_rows)]},
+ schema=pa.schema([pa.field("id", pa.int64()), pa.field("data",
pa.large_string())]),
+ )
+
+
+_REPLACE_SCHEMA = Schema(
+ NestedField(field_id=1, name="id", field_type=LongType(), required=False),
+ NestedField(field_id=2, name="data", field_type=StringType(),
required=False),
+ NestedField(field_id=3, name="extra", field_type=BooleanType(),
required=False),
+)
+
+
+def test_replace_transaction(catalog: Catalog, test_table_identifier:
Identifier) -> None:
+ _, original_schema = _create_simple_table(catalog, test_table_identifier)
+ original = catalog.load_table(test_table_identifier)
+ original.append(_simple_data())
+ original = catalog.load_table(test_table_identifier)
+ old_snapshot_id = original.current_snapshot().snapshot_id # type:
ignore[union-attr]
+ snapshot_log_before = list(original.metadata.snapshot_log)
+ assert len(snapshot_log_before) == 1
+
+ catalog.replace_table_transaction(test_table_identifier,
schema=_REPLACE_SCHEMA).commit_transaction()
+ replaced = catalog.load_table(test_table_identifier)
+
+ # UUID + history preserved, current snapshot cleared, current schema
swapped.
+ assert replaced.metadata.table_uuid == original.metadata.table_uuid
+ assert replaced.metadata.current_snapshot_id is None
+ assert {f.name for f in replaced.schema().fields} == {"id", "data",
"extra"}
+ # Old snapshot kept by identity (not just count), and snapshot_log entries
from before survive
+ # in order at the front of the log.
+ assert any(s.snapshot_id == old_snapshot_id for s in
replaced.metadata.snapshots)
+ assert replaced.metadata.snapshot_log[: len(snapshot_log_before)] ==
snapshot_log_before
+ # Old schema is still in the schemas list alongside the new one.
+ schema_ids = sorted(s.schema_id for s in replaced.metadata.schemas)
+ assert schema_ids == [0, 1]
+ assert replaced.metadata.current_schema_id == 1
+ # Time-travel back to the pre-replace snapshot returns the rows that were
there before.
+ assert
replaced.scan(snapshot_id=old_snapshot_id).to_arrow().equals(_simple_data())
+
+
+@dataclass
+class _ReplaceFixture:
+ """State produced by `_run_complete_replace`: the table before/after the
replace plus
+ the inputs needed to assert on the result."""
+
+ original: Table
+ replaced: Table
+ new_sort: SortOrder
+ original_data: pa.Table
+ old_snapshot_id: int
+
+
+def _run_complete_replace(catalog: Catalog, identifier: Identifier, tmp_path:
Path) -> _ReplaceFixture:
Review Comment:
[AI Reviewer Aid] The three `test_complete_replace_transaction_*` tests
below share this setup, which together mirror Java's
[`testCompleteReplaceTransaction`](https://github.com/apache/iceberg/blob/2f6606a247e2b16be46ca6c02fc4cfc2e17691e6/core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java#L2657-L2731)
— exercises all six `replace_table_transaction` args (schema + spec + sort +
location + properties) with an RTAS append, and asserts history accumulates,
the new snapshot has no parent, and property-merge semantics (keep / override /
add — which Java covers under `testReplaceTransactionProperties*` in the same
file).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]