This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new af746c7 refactor: unify lazy init for table (#344)
af746c7 is described below
commit af746c78da7c30f2d9c269710a401c798518eb5f
Author: Gang Wu <[email protected]>
AuthorDate: Thu Nov 27 10:46:49 2025 +0800
refactor: unify lazy init for table (#344)
Introduced a `TableMetadataCache` to support lazy initialization of
mappings for schemas, partition specs, sort orders and snapshots.
---
src/iceberg/table.cc | 45 ++++++++---------------------
src/iceberg/table.h | 24 ++++++----------
src/iceberg/table_metadata.cc | 64 ++++++++++++++++++++++++++++++++++++++++++
src/iceberg/table_metadata.h | 35 +++++++++++++++++++++++
src/iceberg/test/table_test.cc | 12 ++++----
5 files changed, 125 insertions(+), 55 deletions(-)
diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc
index 7c7df7c..9116a5e 100644
--- a/src/iceberg/table.cc
+++ b/src/iceberg/table.cc
@@ -42,7 +42,8 @@ Table::Table(TableIdentifier identifier,
std::shared_ptr<TableMetadata> metadata
metadata_location_(std::move(metadata_location)),
io_(std::move(io)),
catalog_(std::move(catalog)),
- properties_(TableProperties::FromMap(metadata_->properties)) {}
+ properties_(TableProperties::FromMap(metadata_->properties)),
+ metadata_cache_(std::make_unique<TableMetadataCache>(metadata_.get())) {}
const std::string& Table::uuid() const { return metadata_->table_uuid; }
@@ -57,60 +58,36 @@ Status Table::Refresh() {
metadata_location_ = std::move(refreshed_table->metadata_location_);
io_ = std::move(refreshed_table->io_);
properties_ = std::move(refreshed_table->properties_);
-
- schemas_map_.reset();
- partition_spec_map_.reset();
- sort_orders_map_.reset();
+ metadata_cache_ = std::make_unique<TableMetadataCache>(metadata_.get());
}
return {};
}
Result<std::shared_ptr<Schema>> Table::schema() const { return
metadata_->Schema(); }
-const std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<Schema>>>&
+Result<std::reference_wrapper<const std::unordered_map<int32_t,
std::shared_ptr<Schema>>>>
Table::schemas() const {
- if (!schemas_map_) {
- schemas_map_ =
- std::make_shared<std::unordered_map<int32_t,
std::shared_ptr<Schema>>>();
- for (const auto& schema : metadata_->schemas) {
- if (schema->schema_id()) {
- schemas_map_->emplace(schema->schema_id().value(), schema);
- }
- }
- }
- return schemas_map_;
+ return metadata_cache_->GetSchemasById();
}
Result<std::shared_ptr<PartitionSpec>> Table::spec() const {
return metadata_->PartitionSpec();
}
-const std::shared_ptr<std::unordered_map<int32_t,
std::shared_ptr<PartitionSpec>>>&
+Result<std::reference_wrapper<
+ const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>>>
Table::specs() const {
- if (!partition_spec_map_) {
- partition_spec_map_ =
- std::make_shared<std::unordered_map<int32_t,
std::shared_ptr<PartitionSpec>>>();
- for (const auto& spec : metadata_->partition_specs) {
- partition_spec_map_->emplace(spec->spec_id(), spec);
- }
- }
- return partition_spec_map_;
+ return metadata_cache_->GetPartitionSpecsById();
}
Result<std::shared_ptr<SortOrder>> Table::sort_order() const {
return metadata_->SortOrder();
}
-const std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<SortOrder>>>&
+Result<
+ std::reference_wrapper<const std::unordered_map<int32_t,
std::shared_ptr<SortOrder>>>>
Table::sort_orders() const {
- if (!sort_orders_map_) {
- sort_orders_map_ =
- std::make_shared<std::unordered_map<int32_t,
std::shared_ptr<SortOrder>>>();
- for (const auto& order : metadata_->sort_orders) {
- sort_orders_map_->emplace(order->order_id(), order);
- }
- }
- return sort_orders_map_;
+ return metadata_cache_->GetSortOrdersById();
}
const TableProperties& Table::properties() const { return *properties_; }
diff --git a/src/iceberg/table.h b/src/iceberg/table.h
index 672cac7..1ea5ce8 100644
--- a/src/iceberg/table.h
+++ b/src/iceberg/table.h
@@ -19,6 +19,7 @@
#pragma once
+#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
@@ -60,24 +61,24 @@ class ICEBERG_EXPORT Table {
Result<std::shared_ptr<Schema>> schema() const;
/// \brief Return a map of schema for this table
- /// \note This method is **not** thread-safe in the current implementation.
- const std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<Schema>>>&
schemas()
- const;
+ Result<
+ std::reference_wrapper<const std::unordered_map<int32_t,
std::shared_ptr<Schema>>>>
+ schemas() const;
/// \brief Return the partition spec for this table, return NotFoundError if
not found
Result<std::shared_ptr<PartitionSpec>> spec() const;
/// \brief Return a map of partition specs for this table
- /// \note This method is **not** thread-safe in the current implementation.
- const std::shared_ptr<std::unordered_map<int32_t,
std::shared_ptr<PartitionSpec>>>&
+ Result<std::reference_wrapper<
+ const std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>>>>
specs() const;
/// \brief Return the sort order for this table, return NotFoundError if not
found
Result<std::shared_ptr<SortOrder>> sort_order() const;
/// \brief Return a map of sort order IDs to sort orders for this table
- /// \note This method is **not** thread-safe in the current implementation.
- const std::shared_ptr<std::unordered_map<int32_t,
std::shared_ptr<SortOrder>>>&
+ Result<std::reference_wrapper<
+ const std::unordered_map<int32_t, std::shared_ptr<SortOrder>>>>
sort_orders() const;
/// \brief Return a map of string properties for this table
@@ -124,14 +125,7 @@ class ICEBERG_EXPORT Table {
std::shared_ptr<FileIO> io_;
std::shared_ptr<Catalog> catalog_;
std::unique_ptr<TableProperties> properties_;
-
- // Cache lazy-initialized maps.
- mutable std::shared_ptr<std::unordered_map<int32_t, std::shared_ptr<Schema>>>
- schemas_map_;
- mutable std::shared_ptr<std::unordered_map<int32_t,
std::shared_ptr<PartitionSpec>>>
- partition_spec_map_;
- mutable std::shared_ptr<std::unordered_map<int32_t,
std::shared_ptr<SortOrder>>>
- sort_orders_map_;
+ std::unique_ptr<class TableMetadataCache> metadata_cache_;
};
} // namespace iceberg
diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc
index 669e5a1..b8ddc95 100644
--- a/src/iceberg/table_metadata.cc
+++ b/src/iceberg/table_metadata.cc
@@ -162,6 +162,70 @@ bool operator==(const TableMetadata& lhs, const
TableMetadata& rhs) {
lhs.next_row_id == rhs.next_row_id;
}
+// TableMetadataCache implementation
+
+Result<TableMetadataCache::SchemasMapRef> TableMetadataCache::GetSchemasById()
const {
+ return schemas_map_.Get(metadata_);
+}
+
+Result<TableMetadataCache::PartitionSpecsMapRef>
+TableMetadataCache::GetPartitionSpecsById() const {
+ return partition_specs_map_.Get(metadata_);
+}
+
+Result<TableMetadataCache::SortOrdersMapRef>
TableMetadataCache::GetSortOrdersById()
+ const {
+ return sort_orders_map_.Get(metadata_);
+}
+
+Result<TableMetadataCache::SnapshotsMapRef>
TableMetadataCache::GetSnapshotsById() const {
+ return snapshot_map_.Get(metadata_);
+}
+
+Result<TableMetadataCache::SchemasMap> TableMetadataCache::InitSchemasMap(
+ const TableMetadata* metadata) {
+ SchemasMap schemas_map;
+ schemas_map.reserve(metadata->schemas.size());
+ for (const auto& schema : metadata->schemas) {
+ if (schema->schema_id()) {
+ schemas_map.emplace(schema->schema_id().value(), schema);
+ }
+ }
+ return schemas_map;
+}
+
+Result<TableMetadataCache::PartitionSpecsMap>
TableMetadataCache::InitPartitionSpecsMap(
+ const TableMetadata* metadata) {
+ PartitionSpecsMap partition_specs_map;
+ partition_specs_map.reserve(metadata->partition_specs.size());
+ for (const auto& spec : metadata->partition_specs) {
+ partition_specs_map.emplace(spec->spec_id(), spec);
+ }
+ return partition_specs_map;
+}
+
+Result<TableMetadataCache::SortOrdersMap>
TableMetadataCache::InitSortOrdersMap(
+ const TableMetadata* metadata) {
+ SortOrdersMap sort_orders_map;
+ sort_orders_map.reserve(metadata->sort_orders.size());
+ for (const auto& order : metadata->sort_orders) {
+ sort_orders_map.emplace(order->order_id(), order);
+ }
+ return sort_orders_map;
+}
+
+Result<TableMetadataCache::SnapshotsMap> TableMetadataCache::InitSnapshotMap(
+ const TableMetadata* metadata) {
+ SnapshotsMap snapshots_map;
+ snapshots_map.reserve(metadata->snapshots.size());
+ for (const auto& snapshot : metadata->snapshots) {
+ snapshots_map.emplace(snapshot->snapshot_id, snapshot);
+ }
+ return snapshots_map;
+}
+
+// TableMetadataUtil implementation
+
Result<MetadataFileCodecType> TableMetadataUtil::CodecFromFileName(
std::string_view file_name) {
if (file_name.find(".metadata.json") == std::string::npos) {
diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h
index 2a998c7..5ecbe47 100644
--- a/src/iceberg/table_metadata.h
+++ b/src/iceberg/table_metadata.h
@@ -30,6 +30,7 @@
#include "iceberg/iceberg_export.h"
#include "iceberg/type_fwd.h"
+#include "iceberg/util/lazy.h"
#include "iceberg/util/timepoint.h"
namespace iceberg {
@@ -139,6 +140,40 @@ struct ICEBERG_EXPORT TableMetadata {
const TableMetadata& rhs);
};
+// Cache for table metadata mappings to facilitate fast lookups.
+class ICEBERG_EXPORT TableMetadataCache {
+ public:
+ explicit TableMetadataCache(const TableMetadata* metadata) :
metadata_(metadata) {}
+
+ template <typename T>
+ using ByIdMap = std::unordered_map<int32_t, std::shared_ptr<T>>;
+ using SchemasMap = ByIdMap<Schema>;
+ using PartitionSpecsMap = ByIdMap<PartitionSpec>;
+ using SortOrdersMap = ByIdMap<SortOrder>;
+ using SnapshotsMap = std::unordered_map<int64_t, std::shared_ptr<Snapshot>>;
+ using SchemasMapRef = std::reference_wrapper<const SchemasMap>;
+ using PartitionSpecsMapRef = std::reference_wrapper<const PartitionSpecsMap>;
+ using SortOrdersMapRef = std::reference_wrapper<const SortOrdersMap>;
+ using SnapshotsMapRef = std::reference_wrapper<const SnapshotsMap>;
+
+ Result<SchemasMapRef> GetSchemasById() const;
+ Result<PartitionSpecsMapRef> GetPartitionSpecsById() const;
+ Result<SortOrdersMapRef> GetSortOrdersById() const;
+ Result<SnapshotsMapRef> GetSnapshotsById() const;
+
+ private:
+ static Result<SchemasMap> InitSchemasMap(const TableMetadata* metadata);
+ static Result<PartitionSpecsMap> InitPartitionSpecsMap(const TableMetadata*
metadata);
+ static Result<SortOrdersMap> InitSortOrdersMap(const TableMetadata*
metadata);
+ static Result<SnapshotsMap> InitSnapshotMap(const TableMetadata* metadata);
+
+ const TableMetadata* metadata_;
+ Lazy<InitSchemasMap> schemas_map_;
+ Lazy<InitPartitionSpecsMap> partition_specs_map_;
+ Lazy<InitSortOrdersMap> sort_orders_map_;
+ Lazy<InitSnapshotMap> snapshot_map_;
+};
+
/// \brief Returns a string representation of a SnapshotLogEntry
ICEBERG_EXPORT std::string ToString(const SnapshotLogEntry& entry);
diff --git a/src/iceberg/test/table_test.cc b/src/iceberg/test/table_test.cc
index 6804910..362add9 100644
--- a/src/iceberg/test/table_test.cc
+++ b/src/iceberg/test/table_test.cc
@@ -49,19 +49,19 @@ TEST(Table, TableV1) {
ASSERT_TRUE(schema.has_value());
ASSERT_EQ(schema.value()->fields().size(), 3);
auto schemas = table.schemas();
- ASSERT_TRUE(schemas->empty());
+ ASSERT_TRUE(schemas->get().empty());
// Check table spec
auto spec = table.spec();
ASSERT_TRUE(spec.has_value());
auto specs = table.specs();
- ASSERT_EQ(1UL, specs->size());
+ ASSERT_EQ(1UL, specs->get().size());
// Check table sort_order
auto sort_order = table.sort_order();
ASSERT_TRUE(sort_order.has_value());
auto sort_orders = table.sort_orders();
- ASSERT_EQ(1UL, sort_orders->size());
+ ASSERT_EQ(1UL, sort_orders->get().size());
// Check table location
auto location = table.location();
@@ -89,19 +89,19 @@ TEST(Table, TableV2) {
ASSERT_TRUE(schema.has_value());
ASSERT_EQ(schema.value()->fields().size(), 3);
auto schemas = table.schemas();
- ASSERT_FALSE(schemas->empty());
+ ASSERT_FALSE(schemas->get().empty());
// Check partition spec
auto spec = table.spec();
ASSERT_TRUE(spec.has_value());
auto specs = table.specs();
- ASSERT_EQ(1UL, specs->size());
+ ASSERT_EQ(1UL, specs->get().size());
// Check sort order
auto sort_order = table.sort_order();
ASSERT_TRUE(sort_order.has_value());
auto sort_orders = table.sort_orders();
- ASSERT_EQ(1UL, sort_orders->size());
+ ASSERT_EQ(1UL, sort_orders->get().size());
// Check table location
auto location = table.location();