This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new a706ded refactor: package once_flag and LazyInitWithCallOnce (#266)
a706ded is described below
commit a706ded62bfef420528de9889e9ab4c2253340e9
Author: Zehua Zou <[email protected]>
AuthorDate: Tue Oct 21 15:44:25 2025 +0800
refactor: package once_flag and LazyInitWithCallOnce (#266)
---
src/iceberg/schema.cc | 65 +++++++++++++++++++++++------------------------
src/iceberg/schema.h | 24 ++++++++----------
src/iceberg/type.cc | 61 +++++++++++++++++++++-----------------------
src/iceberg/type.h | 28 ++++++++-------------
src/iceberg/util/lazy.h | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 147 insertions(+), 98 deletions(-)
diff --git a/src/iceberg/schema.cc b/src/iceberg/schema.cc
index 2ab2f7e..819260c 100644
--- a/src/iceberg/schema.cc
+++ b/src/iceberg/schema.cc
@@ -90,56 +90,55 @@ bool Schema::Equals(const Schema& other) const {
Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldByName(
std::string_view name, bool case_sensitive) const {
if (case_sensitive) {
- ICEBERG_RETURN_UNEXPECTED(
- LazyInitWithCallOnce(name_to_id_flag_, [this]() { return
InitNameToIdMap(); }));
- auto it = name_to_id_.find(name);
- if (it == name_to_id_.end()) return std::nullopt;
+ ICEBERG_ASSIGN_OR_RAISE(auto name_to_id, name_to_id_.Get(*this));
+ auto it = name_to_id.get().find(name);
+ if (it == name_to_id.get().end()) {
+ return std::nullopt;
+ };
return FindFieldById(it->second);
}
- ICEBERG_RETURN_UNEXPECTED(LazyInitWithCallOnce(
- lowercase_name_to_id_flag_, [this]() { return
InitLowerCaseNameToIdMap(); }));
- auto it = lowercase_name_to_id_.find(StringUtils::ToLower(name));
- if (it == lowercase_name_to_id_.end()) return std::nullopt;
+ ICEBERG_ASSIGN_OR_RAISE(auto lowercase_name_to_id,
lowercase_name_to_id_.Get(*this));
+ auto it = lowercase_name_to_id.get().find(StringUtils::ToLower(name));
+ if (it == lowercase_name_to_id.get().end()) {
+ return std::nullopt;
+ }
return FindFieldById(it->second);
}
-Status Schema::InitIdToFieldMap() const {
- if (!id_to_field_.empty()) {
- return {};
- }
- IdToFieldVisitor visitor(id_to_field_);
- ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
- return {};
+Result<std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>>
+Schema::InitIdToFieldMap(const Schema& self) {
+ std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>
id_to_field;
+ IdToFieldVisitor visitor(id_to_field);
+ ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(self, &visitor));
+ return id_to_field;
}
-Status Schema::InitNameToIdMap() const {
- if (!name_to_id_.empty()) {
- return {};
- }
- NameToIdVisitor visitor(name_to_id_, /*case_sensitive=*/true);
+Result<std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>>
+Schema::InitNameToIdMap(const Schema& self) {
+ std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>
name_to_id;
+ NameToIdVisitor visitor(name_to_id, /*case_sensitive=*/true);
ICEBERG_RETURN_UNEXPECTED(
- VisitTypeInline(*this, &visitor, /*path=*/"", /*short_path=*/""));
+ VisitTypeInline(self, &visitor, /*path=*/"", /*short_path=*/""));
visitor.Finish();
- return {};
+ return name_to_id;
}
-Status Schema::InitLowerCaseNameToIdMap() const {
- if (!lowercase_name_to_id_.empty()) {
- return {};
- }
- NameToIdVisitor visitor(lowercase_name_to_id_, /*case_sensitive=*/false);
+Result<std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>>
+Schema::InitLowerCaseNameToIdMap(const Schema& self) {
+ std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>
+ lowercase_name_to_id;
+ NameToIdVisitor visitor(lowercase_name_to_id, /*case_sensitive=*/false);
ICEBERG_RETURN_UNEXPECTED(
- VisitTypeInline(*this, &visitor, /*path=*/"", /*short_path=*/""));
+ VisitTypeInline(self, &visitor, /*path=*/"", /*short_path=*/""));
visitor.Finish();
- return {};
+ return lowercase_name_to_id;
}
Result<std::optional<std::reference_wrapper<const SchemaField>>>
Schema::FindFieldById(
int32_t field_id) const {
- ICEBERG_RETURN_UNEXPECTED(
- LazyInitWithCallOnce(id_to_field_flag_, [this]() { return
InitIdToFieldMap(); }));
- auto it = id_to_field_.find(field_id);
- if (it == id_to_field_.end()) {
+ ICEBERG_ASSIGN_OR_RAISE(auto id_to_field, id_to_field_.Get(*this));
+ auto it = id_to_field.get().find(field_id);
+ if (it == id_to_field.get().end()) {
return std::nullopt;
}
return it->second;
diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h
index 81f9aa3..2b30a7d 100644
--- a/src/iceberg/schema.h
+++ b/src/iceberg/schema.h
@@ -24,7 +24,6 @@
/// and any utility functions. See iceberg/type.h and iceberg/field.h as well.
#include <cstdint>
-#include <mutex>
#include <optional>
#include <string>
#include <unordered_set>
@@ -34,6 +33,7 @@
#include "iceberg/result.h"
#include "iceberg/schema_field.h"
#include "iceberg/type.h"
+#include "iceberg/util/lazy.h"
#include "iceberg/util/string_util.h"
namespace iceberg {
@@ -99,24 +99,20 @@ class ICEBERG_EXPORT Schema : public StructType {
/// \brief Compare two schemas for equality.
bool Equals(const Schema& other) const;
- Status InitIdToFieldMap() const;
- Status InitNameToIdMap() const;
- Status InitLowerCaseNameToIdMap() const;
+ static Result<std::unordered_map<int32_t, std::reference_wrapper<const
SchemaField>>>
+ InitIdToFieldMap(const Schema&);
+ static Result<std::unordered_map<std::string, int32_t, StringHash,
std::equal_to<>>>
+ InitNameToIdMap(const Schema&);
+ static Result<std::unordered_map<std::string, int32_t, StringHash,
std::equal_to<>>>
+ InitLowerCaseNameToIdMap(const Schema&);
const std::optional<int32_t> schema_id_;
/// Mapping from field id to field.
- mutable std::unordered_map<int32_t, std::reference_wrapper<const
SchemaField>>
- id_to_field_;
+ Lazy<InitIdToFieldMap> id_to_field_;
/// Mapping from field name to field id.
- mutable std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>
- name_to_id_;
+ Lazy<InitNameToIdMap> name_to_id_;
/// Mapping from lowercased field name to field id
- mutable std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>
- lowercase_name_to_id_;
-
- mutable std::once_flag id_to_field_flag_;
- mutable std::once_flag name_to_id_flag_;
- mutable std::once_flag lowercase_name_to_id_flag_;
+ Lazy<InitLowerCaseNameToIdMap> lowercase_name_to_id_;
};
} // namespace iceberg
diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc
index ddb3285..104ddad 100644
--- a/src/iceberg/type.cc
+++ b/src/iceberg/type.cc
@@ -51,10 +51,9 @@ std::string StructType::ToString() const {
std::span<const SchemaField> StructType::fields() const { return fields_; }
Result<std::optional<NestedType::SchemaFieldConstRef>>
StructType::GetFieldById(
int32_t field_id) const {
- ICEBERG_RETURN_UNEXPECTED(
- LazyInitWithCallOnce(field_by_id_flag_, [this]() { return
InitFieldById(); }));
- auto it = field_by_id_.find(field_id);
- if (it == field_by_id_.end()) return std::nullopt;
+ ICEBERG_ASSIGN_OR_RAISE(auto field_by_id, field_by_id_.Get(*this));
+ auto it = field_by_id.get().find(field_id);
+ if (it == field_by_id.get().end()) return std::nullopt;
return it->second;
}
Result<std::optional<NestedType::SchemaFieldConstRef>>
StructType::GetFieldByIndex(
@@ -67,18 +66,17 @@ Result<std::optional<NestedType::SchemaFieldConstRef>>
StructType::GetFieldByInd
Result<std::optional<NestedType::SchemaFieldConstRef>>
StructType::GetFieldByName(
std::string_view name, bool case_sensitive) const {
if (case_sensitive) {
- ICEBERG_RETURN_UNEXPECTED(LazyInitWithCallOnce(
- field_by_name_flag_, [this]() { return InitFieldByName(); }));
- auto it = field_by_name_.find(name);
- if (it != field_by_name_.end()) {
+ ICEBERG_ASSIGN_OR_RAISE(auto field_by_name, field_by_name_.Get(*this));
+ auto it = field_by_name.get().find(name);
+ if (it != field_by_name.get().end()) {
return it->second;
}
return std::nullopt;
}
- ICEBERG_RETURN_UNEXPECTED(LazyInitWithCallOnce(
- field_by_lowercase_name_flag_, [this]() { return
InitFieldByLowerCaseName(); }));
- auto it = field_by_lowercase_name_.find(StringUtils::ToLower(name));
- if (it != field_by_lowercase_name_.end()) {
+ ICEBERG_ASSIGN_OR_RAISE(auto field_by_lowercase_name,
+ field_by_lowercase_name_.Get(*this));
+ auto it = field_by_lowercase_name.get().find(StringUtils::ToLower(name));
+ if (it != field_by_lowercase_name.get().end()) {
return it->second;
}
return std::nullopt;
@@ -90,47 +88,44 @@ bool StructType::Equals(const Type& other) const {
const auto& struct_ = static_cast<const StructType&>(other);
return fields_ == struct_.fields_;
}
-Status StructType::InitFieldById() const {
- if (!field_by_id_.empty()) {
- return {};
- }
- for (const auto& field : fields_) {
- auto it = field_by_id_.try_emplace(field.field_id(), field);
+Result<std::unordered_map<int32_t, StructType::SchemaFieldConstRef>>
+StructType::InitFieldById(const StructType& self) {
+ std::unordered_map<int32_t, SchemaFieldConstRef> field_by_id;
+ for (const auto& field : self.fields_) {
+ auto it = field_by_id.try_emplace(field.field_id(), field);
if (!it.second) {
return InvalidSchema("Duplicate field id found: {} (prev name: {}, curr
name: {})",
field.field_id(), it.first->second.get().name(),
field.name());
}
}
- return {};
+ return field_by_id;
}
-Status StructType::InitFieldByName() const {
- if (!field_by_name_.empty()) {
- return {};
- }
- for (const auto& field : fields_) {
- auto it = field_by_name_.try_emplace(field.name(), field);
+Result<std::unordered_map<std::string_view, StructType::SchemaFieldConstRef>>
+StructType::InitFieldByName(const StructType& self) {
+ std::unordered_map<std::string_view, StructType::SchemaFieldConstRef>
field_by_name;
+ for (const auto& field : self.fields_) {
+ auto it = field_by_name.try_emplace(field.name(), field);
if (!it.second) {
return InvalidSchema("Duplicate field name found: {} (prev id: {}, curr
id: {})",
it.first->first, it.first->second.get().field_id(),
field.field_id());
}
}
- return {};
+ return field_by_name;
}
-Status StructType::InitFieldByLowerCaseName() const {
- if (!field_by_lowercase_name_.empty()) {
- return {};
- }
- for (const auto& field : fields_) {
+Result<std::unordered_map<std::string, StructType::SchemaFieldConstRef>>
+StructType::InitFieldByLowerCaseName(const StructType& self) {
+ std::unordered_map<std::string, SchemaFieldConstRef> field_by_lowercase_name;
+ for (const auto& field : self.fields_) {
auto it =
-
field_by_lowercase_name_.try_emplace(StringUtils::ToLower(field.name()), field);
+
field_by_lowercase_name.try_emplace(StringUtils::ToLower(field.name()), field);
if (!it.second) {
return InvalidSchema(
"Duplicate lowercase field name found: {} (prev id: {}, curr id:
{})",
it.first->first, it.first->second.get().field_id(),
field.field_id());
}
}
- return {};
+ return field_by_lowercase_name;
}
ListType::ListType(SchemaField element) : element_(std::move(element)) {
diff --git a/src/iceberg/type.h b/src/iceberg/type.h
index 2565268..49866c4 100644
--- a/src/iceberg/type.h
+++ b/src/iceberg/type.h
@@ -26,7 +26,6 @@
#include <array>
#include <cstdint>
#include <memory>
-#include <mutex>
#include <optional>
#include <span>
#include <string>
@@ -37,16 +36,10 @@
#include "iceberg/result.h"
#include "iceberg/schema_field.h"
#include "iceberg/util/formattable.h"
+#include "iceberg/util/lazy.h"
namespace iceberg {
-template <typename Func>
-Status LazyInitWithCallOnce(std::once_flag& flag, Func&& func) {
- Status status;
- std::call_once(flag, [&status, &func]() { status = func(); });
- return status;
-}
-
/// \brief Interface for a data type for a field.
class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
public:
@@ -133,18 +126,17 @@ class ICEBERG_EXPORT StructType : public NestedType {
protected:
bool Equals(const Type& other) const override;
- Status InitFieldById() const;
- Status InitFieldByName() const;
- Status InitFieldByLowerCaseName() const;
+ static Result<std::unordered_map<int32_t, SchemaFieldConstRef>>
InitFieldById(
+ const StructType&);
+ static Result<std::unordered_map<std::string_view, SchemaFieldConstRef>>
+ InitFieldByName(const StructType&);
+ static Result<std::unordered_map<std::string, SchemaFieldConstRef>>
+ InitFieldByLowerCaseName(const StructType&);
std::vector<SchemaField> fields_;
- mutable std::unordered_map<int32_t, SchemaFieldConstRef> field_by_id_;
- mutable std::unordered_map<std::string_view, SchemaFieldConstRef>
field_by_name_;
- mutable std::unordered_map<std::string, SchemaFieldConstRef>
field_by_lowercase_name_;
-
- mutable std::once_flag field_by_id_flag_;
- mutable std::once_flag field_by_name_flag_;
- mutable std::once_flag field_by_lowercase_name_flag_;
+ Lazy<InitFieldById> field_by_id_;
+ Lazy<InitFieldByName> field_by_name_;
+ Lazy<InitFieldByLowerCaseName> field_by_lowercase_name_;
};
/// \brief A data type representing a list of values.
diff --git a/src/iceberg/util/lazy.h b/src/iceberg/util/lazy.h
new file mode 100644
index 0000000..530fb6c
--- /dev/null
+++ b/src/iceberg/util/lazy.h
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/util/lazy.h
+/// Lazy initialization utility.
+
+#include <concepts>
+#include <functional>
+#include <mutex>
+
+#include "iceberg/result.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+template <auto InitFunc>
+class Lazy {
+ template <typename R>
+ struct Trait;
+
+ template <typename R, typename... Args>
+ struct Trait<R (*)(Args...)> {
+ using ReturnType = R::value_type;
+ };
+
+ using T = Trait<decltype(InitFunc)>::ReturnType;
+
+ public:
+ template <typename... Args>
+ requires std::invocable<decltype(InitFunc), Args...> &&
+ std::same_as<std::invoke_result_t<decltype(InitFunc), Args...>,
Result<T>>
+ Result<std::reference_wrapper<T>> Get(Args&&... args) const {
+ Result<T> result;
+ std::call_once(flag_, [&result, this, &args...]() {
+ result = InitFunc(std::forward<Args>(args)...);
+ if (result) {
+ this->value_ = std::move(result.value());
+ }
+ });
+ ICEBERG_RETURN_UNEXPECTED(result);
+ return std::ref(value_);
+ }
+
+ private:
+ mutable T value_;
+ mutable std::once_flag flag_;
+};
+
+}; // namespace iceberg