This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 349eb923 refactor: optimize SnapshotUtil ancestor methods with early
return (#560)
349eb923 is described below
commit 349eb923878703b26b67fcf4ed87202a17b315a7
Author: wzhuo <[email protected]>
AuthorDate: Fri Feb 27 15:29:16 2026 +0800
refactor: optimize SnapshotUtil ancestor methods with early return (#560)
---
src/iceberg/util/snapshot_util.cc | 144 +++++++++++++++++++-----------
src/iceberg/util/snapshot_util_internal.h | 95 ++++++++++++++++----
2 files changed, 171 insertions(+), 68 deletions(-)
diff --git a/src/iceberg/util/snapshot_util.cc
b/src/iceberg/util/snapshot_util.cc
index 84e7a10b..49019408 100644
--- a/src/iceberg/util/snapshot_util.cc
+++ b/src/iceberg/util/snapshot_util.cc
@@ -41,9 +41,13 @@ namespace iceberg {
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(
const Table& table, int64_t snapshot_id) {
- return table.SnapshotById(snapshot_id).and_then([&table](const auto&
snapshot) {
- return AncestorsOf(table, snapshot);
- });
+ return AncestorsOf(*table.metadata(), snapshot_id);
+}
+
+Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(
+ const TableMetadata& metadata, int64_t snapshot_id) {
+ return AncestorsOf(snapshot_id,
+ [&metadata](int64_t id) { return
metadata.SnapshotById(id); });
}
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(
@@ -54,72 +58,96 @@ Result<std::vector<std::shared_ptr<Snapshot>>>
SnapshotUtil::AncestorsOf(
});
}
-Result<bool> SnapshotUtil::IsAncestorOf(const Table& table, int64_t
snapshot_id,
+Result<bool> SnapshotUtil::IsAncestorOf(const Table& table,
int64_t ancestor_snapshot_id) {
- ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, snapshot_id));
- return std::ranges::any_of(ancestors, [ancestor_snapshot_id](const auto&
snapshot) {
- return snapshot != nullptr && snapshot->snapshot_id ==
ancestor_snapshot_id;
- });
+ return IsAncestorOf(*table.metadata(), ancestor_snapshot_id);
}
-Result<bool> SnapshotUtil::IsAncestorOf(
- int64_t snapshot_id, int64_t ancestor_snapshot_id,
- const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup) {
- ICEBERG_ASSIGN_OR_RAISE(auto snapshot, lookup(snapshot_id));
- ICEBERG_CHECK(snapshot != nullptr, "Cannot find snapshot: {}", snapshot_id);
- ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(snapshot, lookup));
- return std::ranges::any_of(ancestors, [ancestor_snapshot_id](const auto&
ancestor) {
- return ancestor != nullptr && ancestor->snapshot_id ==
ancestor_snapshot_id;
- });
+Result<bool> SnapshotUtil::IsAncestorOf(const TableMetadata& metadata,
+ int64_t ancestor_snapshot_id) {
+ ICEBERG_ASSIGN_OR_RAISE(auto current, metadata.Snapshot());
+ ICEBERG_CHECK(current != nullptr, "Current snapshot is null");
+ return IsAncestorOf(metadata, current->snapshot_id, ancestor_snapshot_id);
}
-Result<bool> SnapshotUtil::IsAncestorOf(const Table& table,
+Result<bool> SnapshotUtil::IsAncestorOf(const Table& table, int64_t
snapshot_id,
int64_t ancestor_snapshot_id) {
- ICEBERG_ASSIGN_OR_RAISE(auto current, table.current_snapshot());
- ICEBERG_CHECK(current != nullptr, "Current snapshot is null");
- return IsAncestorOf(table, current->snapshot_id, ancestor_snapshot_id);
+ return IsAncestorOf(*table.metadata(), snapshot_id, ancestor_snapshot_id);
}
Result<bool> SnapshotUtil::IsAncestorOf(const TableMetadata& metadata,
+ int64_t snapshot_id,
int64_t ancestor_snapshot_id) {
- ICEBERG_ASSIGN_OR_RAISE(auto current, metadata.Snapshot());
- ICEBERG_CHECK(current != nullptr, "Current snapshot is null");
+ return IsAncestorOf(snapshot_id, ancestor_snapshot_id,
+ [&metadata](int64_t id) { return
metadata.SnapshotById(id); });
+}
- // Create a lookup function that uses the metadata
- auto lookup = [&metadata](int64_t id) -> Result<std::shared_ptr<Snapshot>> {
- return metadata.SnapshotById(id);
- };
+Result<bool> SnapshotUtil::IsAncestorOf(
+ int64_t snapshot_id, int64_t ancestor_snapshot_id,
+ const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup) {
+ ICEBERG_ASSIGN_OR_RAISE(auto current, lookup(snapshot_id));
+ ICEBERG_CHECK(current != nullptr, "Cannot find snapshot: {}", snapshot_id);
- ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(current->snapshot_id,
lookup));
- return std::ranges::any_of(ancestors, [ancestor_snapshot_id](const auto&
snapshot) {
- return snapshot != nullptr && snapshot->snapshot_id ==
ancestor_snapshot_id;
- });
+ if (snapshot_id == ancestor_snapshot_id) {
+ return true;
+ }
+
+ while (current != nullptr && current->parent_snapshot_id.has_value()) {
+ int64_t parent_id = current->parent_snapshot_id.value();
+ auto parent_result = lookup(parent_id);
+ ICEBERG_ACTION_FOR_NOT_FOUND(parent_result, { break; });
+ if (parent_id == ancestor_snapshot_id) {
+ return true;
+ }
+ current = std::move(parent_result.value());
+ }
+
+ return false;
}
Result<bool> SnapshotUtil::IsParentAncestorOf(const Table& table, int64_t
snapshot_id,
int64_t
ancestor_parent_snapshot_id) {
- ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, snapshot_id));
- return std::ranges::any_of(
- ancestors, [ancestor_parent_snapshot_id](const auto& snapshot) {
- return snapshot != nullptr && snapshot->parent_snapshot_id.has_value()
&&
- snapshot->parent_snapshot_id.value() ==
ancestor_parent_snapshot_id;
- });
+ return IsParentAncestorOf(*table.metadata(), snapshot_id,
ancestor_parent_snapshot_id);
+}
+
+Result<bool> SnapshotUtil::IsParentAncestorOf(const TableMetadata& metadata,
+ int64_t snapshot_id,
+ int64_t
ancestor_parent_snapshot_id) {
+ return IsParentAncestorOf(
+ snapshot_id, ancestor_parent_snapshot_id,
+ [&metadata](int64_t id) { return metadata.SnapshotById(id); });
+}
+
+Result<bool> SnapshotUtil::IsParentAncestorOf(
+ int64_t snapshot_id, int64_t ancestor_parent_snapshot_id,
+ const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup) {
+ ICEBERG_ASSIGN_OR_RAISE(auto current, lookup(snapshot_id));
+ ICEBERG_CHECK(current != nullptr, "Cannot find snapshot: {}", snapshot_id);
+
+ while (current != nullptr) {
+ if (!current->parent_snapshot_id.has_value()) {
+ break;
+ }
+ if (current->parent_snapshot_id.value() == ancestor_parent_snapshot_id) {
+ return true;
+ }
+ auto parent_result = lookup(current->parent_snapshot_id.value());
+ ICEBERG_ACTION_FOR_NOT_FOUND(parent_result, { break; });
+ current = std::move(parent_result.value());
+ }
+
+ return false;
}
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::CurrentAncestors(
const Table& table) {
- ICEBERG_ASSIGN_OR_RAISE(auto current, table.current_snapshot());
- return AncestorsOf(table, current);
+ return CurrentAncestors(*table.metadata());
}
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::CurrentAncestors(
const TableMetadata& metadata) {
ICEBERG_ASSIGN_OR_RAISE(auto current, metadata.Snapshot());
- auto lookup = [&metadata](int64_t id) -> Result<std::shared_ptr<Snapshot>> {
- return metadata.SnapshotById(id);
- };
-
- return AncestorsOf(current, lookup);
+ return AncestorsOf(metadata, current);
}
Result<std::vector<int64_t>> SnapshotUtil::CurrentAncestorIds(const Table&
table) {
@@ -137,7 +165,12 @@ Result<std::optional<std::shared_ptr<Snapshot>>>
SnapshotUtil::OldestAncestor(
Result<std::optional<std::shared_ptr<Snapshot>>>
SnapshotUtil::OldestAncestorOf(
const Table& table, int64_t snapshot_id) {
- ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, snapshot_id));
+ return OldestAncestorOf(*table.metadata(), snapshot_id);
+}
+
+Result<std::optional<std::shared_ptr<Snapshot>>>
SnapshotUtil::OldestAncestorOf(
+ const TableMetadata& metadata, int64_t snapshot_id) {
+ ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(metadata, snapshot_id));
if (ancestors.empty()) {
return std::nullopt;
}
@@ -151,7 +184,7 @@ Result<std::optional<std::shared_ptr<Snapshot>>>
SnapshotUtil::OldestAncestorAft
auto current = std::move(current_result.value());
std::optional<std::shared_ptr<Snapshot>> last_snapshot = std::nullopt;
- ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(table, current));
+ ICEBERG_ASSIGN_OR_RAISE(auto ancestors, AncestorsOf(*table.metadata(),
current));
for (const auto& snapshot : ancestors) {
auto snapshot_timestamp_ms = snapshot->timestamp_ms;
if (snapshot_timestamp_ms < timestamp_ms) {
@@ -200,7 +233,13 @@ Result<std::vector<int64_t>>
SnapshotUtil::AncestorIdsBetween(
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsBetween(
const Table& table, int64_t latest_snapshot_id,
std::optional<int64_t> oldest_snapshot_id) {
- ICEBERG_ASSIGN_OR_RAISE(auto start, table.SnapshotById(latest_snapshot_id));
+ return AncestorsBetween(*table.metadata(), latest_snapshot_id,
oldest_snapshot_id);
+}
+
+Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsBetween(
+ const TableMetadata& metadata, int64_t latest_snapshot_id,
+ std::optional<int64_t> oldest_snapshot_id) {
+ ICEBERG_ASSIGN_OR_RAISE(auto start,
metadata.SnapshotById(latest_snapshot_id));
if (oldest_snapshot_id.has_value()) {
if (latest_snapshot_id == oldest_snapshot_id.value()) {
@@ -208,21 +247,22 @@ Result<std::vector<std::shared_ptr<Snapshot>>>
SnapshotUtil::AncestorsBetween(
}
return AncestorsOf(start,
- [&table, oldest_snapshot_id =
oldest_snapshot_id.value()](
+ [&metadata, oldest_snapshot_id =
oldest_snapshot_id.value()](
int64_t id) -> Result<std::shared_ptr<Snapshot>> {
if (id == oldest_snapshot_id) {
return nullptr;
}
- return table.SnapshotById(id);
+ return metadata.SnapshotById(id);
});
} else {
- return AncestorsOf(table, start);
+ return AncestorsOf(metadata, start);
}
}
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(
- const Table& table, const std::shared_ptr<Snapshot>& snapshot) {
- return AncestorsOf(snapshot, [&table](int64_t id) { return
table.SnapshotById(id); });
+ const TableMetadata& metadata, const std::shared_ptr<Snapshot>& snapshot) {
+ return AncestorsOf(snapshot,
+ [&metadata](int64_t id) { return
metadata.SnapshotById(id); });
}
Result<std::vector<std::shared_ptr<Snapshot>>> SnapshotUtil::AncestorsOf(
diff --git a/src/iceberg/util/snapshot_util_internal.h
b/src/iceberg/util/snapshot_util_internal.h
index ce5e0278..8a315818 100644
--- a/src/iceberg/util/snapshot_util_internal.h
+++ b/src/iceberg/util/snapshot_util_internal.h
@@ -44,6 +44,14 @@ class ICEBERG_EXPORT SnapshotUtil {
static Result<std::vector<std::shared_ptr<Snapshot>>> AncestorsOf(const
Table& table,
int64_t
snapshot_id);
+ /// \brief Returns a vector of ancestors of the given snapshot.
+ ///
+ /// \param metadata The table metadata
+ /// \param snapshot_id The snapshot ID to start from
+ /// \return A vector of ancestor snapshots
+ static Result<std::vector<std::shared_ptr<Snapshot>>> AncestorsOf(
+ const TableMetadata& metadata, int64_t snapshot_id);
+
/// \brief Returns a vector of ancestors of the given snapshot.
///
/// \param snapshot_id The snapshot ID to start from
@@ -53,6 +61,23 @@ class ICEBERG_EXPORT SnapshotUtil {
int64_t snapshot_id,
const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup);
+ /// \brief Returns whether ancestor_snapshot_id is an ancestor of the
table's current
+ /// state.
+ ///
+ /// \param table The table to check
+ /// \param ancestor_snapshot_id The ancestor snapshot ID to check for
+ /// \return true if ancestor_snapshot_id is an ancestor of the current
snapshot
+ static Result<bool> IsAncestorOf(const Table& table, int64_t
ancestor_snapshot_id);
+
+ /// \brief Returns whether ancestor_snapshot_id is an ancestor of the
metadata's current
+ /// state.
+ ///
+ /// \param metadata The table metadata to check
+ /// \param ancestor_snapshot_id The ancestor snapshot ID to check for
+ /// \return true if ancestor_snapshot_id is an ancestor of the current
snapshot
+ static Result<bool> IsAncestorOf(const TableMetadata& metadata,
+ int64_t ancestor_snapshot_id);
+
/// \brief Returns whether ancestor_snapshot_id is an ancestor of
snapshot_id.
///
/// \param table The table to check
@@ -62,6 +87,15 @@ class ICEBERG_EXPORT SnapshotUtil {
static Result<bool> IsAncestorOf(const Table& table, int64_t snapshot_id,
int64_t ancestor_snapshot_id);
+ /// \brief Returns whether ancestor_snapshot_id is an ancestor of
snapshot_id.
+ ///
+ /// \param metadata The table metadata to check
+ /// \param snapshot_id The snapshot ID to check
+ /// \param ancestor_snapshot_id The ancestor snapshot ID to check for
+ /// \return true if ancestor_snapshot_id is an ancestor of snapshot_id
+ static Result<bool> IsAncestorOf(const TableMetadata& metadata, int64_t
snapshot_id,
+ int64_t ancestor_snapshot_id);
+
/// \brief Returns whether ancestor_snapshot_id is an ancestor of
snapshot_id using the
/// given lookup function.
///
@@ -73,32 +107,37 @@ class ICEBERG_EXPORT SnapshotUtil {
int64_t snapshot_id, int64_t ancestor_snapshot_id,
const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup);
- /// \brief Returns whether ancestor_snapshot_id is an ancestor of the
table's current
- /// state.
+ /// \brief Returns whether some ancestor of snapshot_id has parentId matches
+ /// ancestor_parent_snapshot_id.
///
/// \param table The table to check
- /// \param ancestor_snapshot_id The ancestor snapshot ID to check for
- /// \return true if ancestor_snapshot_id is an ancestor of the current
snapshot
- static Result<bool> IsAncestorOf(const Table& table, int64_t
ancestor_snapshot_id);
+ /// \param snapshot_id The snapshot ID to check
+ /// \param ancestor_parent_snapshot_id The ancestor parent snapshot ID to
check for
+ /// \return true if any ancestor has the given parent ID
+ static Result<bool> IsParentAncestorOf(const Table& table, int64_t
snapshot_id,
+ int64_t ancestor_parent_snapshot_id);
- /// \brief Returns whether ancestor_snapshot_id is an ancestor of the
metadata's current
- /// state.
+ /// \brief Returns whether some ancestor of snapshot_id has parentId matches
+ /// ancestor_parent_snapshot_id.
///
/// \param metadata The table metadata to check
- /// \param ancestor_snapshot_id The ancestor snapshot ID to check for
- /// \return true if ancestor_snapshot_id is an ancestor of the current
snapshot
- static Result<bool> IsAncestorOf(const TableMetadata& metadata,
- int64_t ancestor_snapshot_id);
+ /// \param snapshot_id The snapshot ID to check
+ /// \param ancestor_parent_snapshot_id The ancestor parent snapshot ID to
check for
+ /// \return true if any ancestor has the given parent ID
+ static Result<bool> IsParentAncestorOf(const TableMetadata& metadata,
+ int64_t snapshot_id,
+ int64_t ancestor_parent_snapshot_id);
/// \brief Returns whether some ancestor of snapshot_id has parentId matches
/// ancestor_parent_snapshot_id.
///
- /// \param table The table to check
/// \param snapshot_id The snapshot ID to check
/// \param ancestor_parent_snapshot_id The ancestor parent snapshot ID to
check for
+ /// \param lookup Function to lookup snapshots by ID
/// \return true if any ancestor has the given parent ID
- static Result<bool> IsParentAncestorOf(const Table& table, int64_t
snapshot_id,
- int64_t ancestor_parent_snapshot_id);
+ static Result<bool> IsParentAncestorOf(
+ int64_t snapshot_id, int64_t ancestor_parent_snapshot_id,
+ const std::function<Result<std::shared_ptr<Snapshot>>(int64_t)>& lookup);
/// \brief Returns a vector that traverses the table's snapshots from the
current to the
/// last known ancestor.
@@ -147,6 +186,19 @@ class ICEBERG_EXPORT SnapshotUtil {
static Result<std::optional<std::shared_ptr<Snapshot>>> OldestAncestorOf(
const Table& table, int64_t snapshot_id);
+ /// \brief Traverses the history and finds the oldest ancestor of the
specified
+ /// snapshot.
+ ///
+ /// Oldest ancestor is defined as the ancestor snapshot whose parent is null
or has been
+ /// expired. If the specified snapshot has no parent or parent has been
expired, the
+ /// specified snapshot itself is returned.
+ ///
+ /// \param metadata The table metadata
+ /// \param snapshot_id The ID of the snapshot to find the oldest ancestor
+ /// \return The oldest snapshot, or nullopt if not found
+ static Result<std::optional<std::shared_ptr<Snapshot>>> OldestAncestorOf(
+ const TableMetadata& metadata, int64_t snapshot_id);
+
/// \brief Traverses the history of the table's current snapshot, finds the
oldest
/// snapshot that was committed either at or after a given time.
///
@@ -192,6 +244,17 @@ class ICEBERG_EXPORT SnapshotUtil {
const Table& table, int64_t latest_snapshot_id,
std::optional<int64_t> oldest_snapshot_id);
+ /// \brief Returns a vector of ancestors between two snapshots.
+ ///
+ /// \param metadata The table metadata
+ /// \param latest_snapshot_id The latest snapshot ID
+ /// \param oldest_snapshot_id The oldest snapshot ID (optional, nullopt
means all
+ /// ancestors)
+ /// \return A vector of ancestor snapshots between the two snapshots
+ static Result<std::vector<std::shared_ptr<Snapshot>>> AncestorsBetween(
+ const TableMetadata& metadata, int64_t latest_snapshot_id,
+ std::optional<int64_t> oldest_snapshot_id);
+
/// \brief Traverses the history of the table's current snapshot and finds
the snapshot
/// with the given snapshot id as its parent.
///
@@ -317,11 +380,11 @@ class ICEBERG_EXPORT SnapshotUtil {
private:
/// \brief Helper function to traverse ancestors of a snapshot.
///
- /// \param table The table
+ /// \param metadata The table metadata
/// \param snapshot The snapshot to start from
/// \return A vector of ancestor snapshots
static Result<std::vector<std::shared_ptr<Snapshot>>> AncestorsOf(
- const Table& table, const std::shared_ptr<Snapshot>& snapshot);
+ const TableMetadata& metadata, const std::shared_ptr<Snapshot>&
snapshot);
/// \brief Helper function to traverse ancestors of a snapshot using a
lookup function.
///