This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 39a9c8b3 feat: add UpdateSchema interface skeleton (#460)
39a9c8b3 is described below
commit 39a9c8b3129770b465e0c21f51ac4d90c904d85b
Author: Guotao Yu <[email protected]>
AuthorDate: Wed Dec 31 16:46:58 2025 +0800
feat: add UpdateSchema interface skeleton (#460)
- Define `UpdateSchema` class interface with full API documentation
---
src/iceberg/CMakeLists.txt | 1 +
src/iceberg/meson.build | 1 +
src/iceberg/table.cc | 12 ++
src/iceberg/table.h | 6 +
src/iceberg/transaction.cc | 15 ++
src/iceberg/transaction.h | 4 +
src/iceberg/type_fwd.h | 1 +
src/iceberg/update/meson.build | 1 +
src/iceberg/update/pending_update.h | 1 +
src/iceberg/update/update_schema.cc | 207 +++++++++++++++++++++
src/iceberg/update/update_schema.h | 356 ++++++++++++++++++++++++++++++++++++
11 files changed, 605 insertions(+)
diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index bc7182ae..a6b836c4 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -83,6 +83,7 @@ set(ICEBERG_SOURCES
update/pending_update.cc
update/update_partition_spec.cc
update/update_properties.cc
+ update/update_schema.cc
update/update_sort_order.cc
util/bucket_util.cc
util/content_file_util.cc
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index 55349d8d..34538bde 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -104,6 +104,7 @@ iceberg_sources = files(
'update/pending_update.cc',
'update/update_partition_spec.cc',
'update/update_properties.cc',
+ 'update/update_schema.cc',
'update/update_sort_order.cc',
'util/bucket_util.cc',
'util/content_file_util.cc',
diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc
index 6b4d317b..ee3ce594 100644
--- a/src/iceberg/table.cc
+++ b/src/iceberg/table.cc
@@ -32,6 +32,7 @@
#include "iceberg/transaction.h"
#include "iceberg/update/update_partition_spec.h"
#include "iceberg/update/update_properties.h"
+#include "iceberg/update/update_schema.h"
#include "iceberg/util/macros.h"
namespace iceberg {
@@ -171,6 +172,13 @@ Result<std::shared_ptr<UpdateSortOrder>>
Table::NewUpdateSortOrder() {
return transaction->NewUpdateSortOrder();
}
+Result<std::shared_ptr<UpdateSchema>> Table::NewUpdateSchema() {
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto transaction, Transaction::Make(shared_from_this(),
Transaction::Kind::kUpdate,
+ /*auto_commit=*/true));
+ return transaction->NewUpdateSchema();
+}
+
Result<std::shared_ptr<StagedTable>> StagedTable::Make(
TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
std::string metadata_location, std::shared_ptr<FileIO> io,
@@ -221,4 +229,8 @@ Result<std::shared_ptr<UpdateProperties>>
StaticTable::NewUpdateProperties() {
return NotSupported("Cannot create an update properties for a static table");
}
+Result<std::shared_ptr<UpdateSchema>> StaticTable::NewUpdateSchema() {
+ return NotSupported("Cannot create an update schema for a static table");
+}
+
} // namespace iceberg
diff --git a/src/iceberg/table.h b/src/iceberg/table.h
index 30ad14c1..31139585 100644
--- a/src/iceberg/table.h
+++ b/src/iceberg/table.h
@@ -140,6 +140,10 @@ class ICEBERG_EXPORT Table : public
std::enable_shared_from_this<Table> {
/// changes.
virtual Result<std::shared_ptr<UpdateSortOrder>> NewUpdateSortOrder();
+ /// \brief Create a new UpdateSchema to alter the columns of this table and
commit the
+ /// changes.
+ virtual Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema();
+
protected:
Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
std::string metadata_location, std::shared_ptr<FileIO> io,
@@ -187,6 +191,8 @@ class ICEBERG_EXPORT StaticTable final : public Table {
Result<std::shared_ptr<UpdateProperties>> NewUpdateProperties() override;
+ Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema() override;
+
private:
using Table::Table;
};
diff --git a/src/iceberg/transaction.cc b/src/iceberg/transaction.cc
index c8446e8b..6641a1af 100644
--- a/src/iceberg/transaction.cc
+++ b/src/iceberg/transaction.cc
@@ -22,6 +22,7 @@
#include <memory>
#include "iceberg/catalog.h"
+#include "iceberg/schema.h"
#include "iceberg/table.h"
#include "iceberg/table_metadata.h"
#include "iceberg/table_requirement.h"
@@ -30,6 +31,7 @@
#include "iceberg/update/pending_update.h"
#include "iceberg/update/update_partition_spec.h"
#include "iceberg/update/update_properties.h"
+#include "iceberg/update/update_schema.h"
#include "iceberg/update/update_sort_order.h"
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/macros.h"
@@ -105,6 +107,12 @@ Status Transaction::Apply(PendingUpdate& update) {
metadata_builder_->AddPartitionSpec(std::move(result.spec));
}
} break;
+ case PendingUpdate::Kind::kUpdateSchema: {
+ auto& update_schema = internal::checked_cast<UpdateSchema&>(update);
+ ICEBERG_ASSIGN_OR_RAISE(auto result, update_schema.Apply());
+ metadata_builder_->SetCurrentSchema(std::move(result.schema),
+ result.new_last_column_id);
+ } break;
default:
return NotSupported("Unsupported pending update: {}",
static_cast<int32_t>(update.kind()));
@@ -178,4 +186,11 @@ Result<std::shared_ptr<UpdateSortOrder>>
Transaction::NewUpdateSortOrder() {
return update_sort_order;
}
+Result<std::shared_ptr<UpdateSchema>> Transaction::NewUpdateSchema() {
+ ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<UpdateSchema> update_schema,
+ UpdateSchema::Make(shared_from_this()));
+ ICEBERG_RETURN_UNEXPECTED(AddUpdate(update_schema));
+ return update_schema;
+}
+
} // namespace iceberg
diff --git a/src/iceberg/transaction.h b/src/iceberg/transaction.h
index 87a2139b..ea918a17 100644
--- a/src/iceberg/transaction.h
+++ b/src/iceberg/transaction.h
@@ -68,6 +68,10 @@ class ICEBERG_EXPORT Transaction : public
std::enable_shared_from_this<Transacti
/// changes.
Result<std::shared_ptr<UpdateSortOrder>> NewUpdateSortOrder();
+ /// \brief Create a new UpdateSchema to alter the columns of this table and
commit the
+ /// changes.
+ Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema();
+
private:
Transaction(std::shared_ptr<Table> table, Kind kind, bool auto_commit,
std::unique_ptr<TableMetadataBuilder> metadata_builder);
diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h
index 65afeb87..2daf39e6 100644
--- a/src/iceberg/type_fwd.h
+++ b/src/iceberg/type_fwd.h
@@ -190,6 +190,7 @@ class Transaction;
class PendingUpdate;
class UpdatePartitionSpec;
class UpdateProperties;
+class UpdateSchema;
class UpdateSortOrder;
///
----------------------------------------------------------------------------
diff --git a/src/iceberg/update/meson.build b/src/iceberg/update/meson.build
index 3fdfda98..e4c786f4 100644
--- a/src/iceberg/update/meson.build
+++ b/src/iceberg/update/meson.build
@@ -19,6 +19,7 @@ install_headers(
[
'pending_update.h',
'update_partition_spec.h',
+ 'update_schema.h',
'update_sort_order.h',
'update_properties.h',
],
diff --git a/src/iceberg/update/pending_update.h
b/src/iceberg/update/pending_update.h
index 95580f40..90723987 100644
--- a/src/iceberg/update/pending_update.h
+++ b/src/iceberg/update/pending_update.h
@@ -44,6 +44,7 @@ class ICEBERG_EXPORT PendingUpdate : public ErrorCollector {
enum class Kind : uint8_t {
kUpdatePartitionSpec,
kUpdateProperties,
+ kUpdateSchema,
kUpdateSortOrder,
};
diff --git a/src/iceberg/update/update_schema.cc
b/src/iceberg/update/update_schema.cc
new file mode 100644
index 00000000..14b962bd
--- /dev/null
+++ b/src/iceberg/update/update_schema.cc
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/update/update_schema.h"
+
+#include <memory>
+#include <optional>
+#include <ranges>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+
+#include "iceberg/schema.h"
+#include "iceberg/table_metadata.h"
+#include "iceberg/transaction.h"
+#include "iceberg/type.h"
+#include "iceberg/util/error_collector.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+Result<std::shared_ptr<UpdateSchema>> UpdateSchema::Make(
+ std::shared_ptr<Transaction> transaction) {
+ ICEBERG_PRECHECK(transaction != nullptr,
+ "Cannot create UpdateSchema without transaction");
+ return std::shared_ptr<UpdateSchema>(new
UpdateSchema(std::move(transaction)));
+}
+
+UpdateSchema::UpdateSchema(std::shared_ptr<Transaction> transaction)
+ : PendingUpdate(std::move(transaction)) {
+ const TableMetadata& base_metadata = transaction_->current();
+
+ // Get the current schema
+ auto schema_result = base_metadata.Schema();
+ if (!schema_result.has_value()) {
+ AddError(schema_result.error());
+ return;
+ }
+ schema_ = std::move(schema_result.value());
+
+ // Initialize last_column_id from base metadata
+ last_column_id_ = base_metadata.last_column_id;
+
+ // Initialize identifier field names from the current schema
+ auto identifier_names_result = schema_->IdentifierFieldNames();
+ if (!identifier_names_result.has_value()) {
+ AddError(identifier_names_result.error());
+ return;
+ }
+ identifier_field_names_ = identifier_names_result.value() |
+ std::ranges::to<std::unordered_set<std::string>>();
+}
+
+UpdateSchema::~UpdateSchema() = default;
+
+UpdateSchema& UpdateSchema::AllowIncompatibleChanges() {
+ allow_incompatible_changes_ = true;
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::CaseSensitive(bool case_sensitive) {
+ case_sensitive_ = case_sensitive;
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::AddColumn(std::string_view name,
std::shared_ptr<Type> type,
+ std::string_view doc) {
+ // Check for "." in top-level name
+ ICEBERG_BUILDER_CHECK(!name.contains('.'),
+ "Cannot add column with ambiguous name: {}, use "
+ "AddColumn(parent, name, type, doc)",
+ name);
+ return AddColumnInternal(std::nullopt, name, /*is_optional=*/true,
std::move(type),
+ doc);
+}
+
+UpdateSchema& UpdateSchema::AddColumn(std::optional<std::string_view> parent,
+ std::string_view name,
std::shared_ptr<Type> type,
+ std::string_view doc) {
+ return AddColumnInternal(std::move(parent), name, /*is_optional=*/true,
std::move(type),
+ doc);
+}
+
+UpdateSchema& UpdateSchema::AddRequiredColumn(std::string_view name,
+ std::shared_ptr<Type> type,
+ std::string_view doc) {
+ // Check for "." in top-level name
+ ICEBERG_BUILDER_CHECK(!name.contains('.'),
+ "Cannot add column with ambiguous name: {}, use "
+ "AddRequiredColumn(parent, name, type, doc)",
+ name);
+ return AddColumnInternal(std::nullopt, name, /*is_optional=*/false,
std::move(type),
+ doc);
+}
+
+UpdateSchema& UpdateSchema::AddRequiredColumn(std::optional<std::string_view>
parent,
+ std::string_view name,
+ std::shared_ptr<Type> type,
+ std::string_view doc) {
+ return AddColumnInternal(std::move(parent), name, /*is_optional=*/false,
+ std::move(type), doc);
+}
+
+UpdateSchema& UpdateSchema::UpdateColumn(std::string_view name,
+ std::shared_ptr<PrimitiveType>
new_type) {
+ // TODO(Guotao Yu): Implement UpdateColumn
+ AddError(NotImplemented("UpdateSchema::UpdateColumn not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::UpdateColumnDoc(std::string_view name,
+ std::string_view new_doc) {
+ // TODO(Guotao Yu): Implement UpdateColumnDoc
+ AddError(NotImplemented("UpdateSchema::UpdateColumnDoc not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view>
parent,
+ std::string_view name, bool
is_optional,
+ std::shared_ptr<Type> type,
+ std::string_view doc) {
+ // TODO(Guotao Yu): Implement AddColumnInternal logic
+ // This is where the real work happens - finding parent, validating, etc.
+ AddError(NotImplemented("UpdateSchema::AddColumnInternal not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::RenameColumn(std::string_view name,
+ std::string_view new_name) {
+ // TODO(Guotao Yu): Implement RenameColumn
+ AddError(NotImplemented("UpdateSchema::RenameColumn not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::MakeColumnOptional(std::string_view name) {
+ // TODO(Guotao Yu): Implement MakeColumnOptional
+ AddError(NotImplemented("UpdateSchema::MakeColumnOptional not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::RequireColumn(std::string_view name) {
+ // TODO(Guotao Yu): Implement RequireColumn
+ AddError(NotImplemented("UpdateSchema::RequireColumn not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::DeleteColumn(std::string_view name) {
+ // TODO(Guotao Yu): Implement DeleteColumn
+ AddError(NotImplemented("UpdateSchema::DeleteColumn not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::MoveFirst(std::string_view name) {
+ // TODO(Guotao Yu): Implement MoveFirst
+ AddError(NotImplemented("UpdateSchema::MoveFirst not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::MoveBefore(std::string_view name,
+ std::string_view before_name) {
+ // TODO(Guotao Yu): Implement MoveBefore
+ AddError(NotImplemented("UpdateSchema::MoveBefore not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::MoveAfter(std::string_view name,
+ std::string_view after_name) {
+ // TODO(Guotao Yu): Implement MoveAfter
+ AddError(NotImplemented("UpdateSchema::MoveAfter not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::UnionByNameWith(std::shared_ptr<Schema>
new_schema) {
+ // TODO(Guotao Yu): Implement UnionByNameWith
+ AddError(NotImplemented("UpdateSchema::UnionByNameWith not implemented"));
+ return *this;
+}
+
+UpdateSchema& UpdateSchema::SetIdentifierFields(
+ const std::span<std::string_view>& names) {
+ identifier_field_names_ = names |
std::ranges::to<std::unordered_set<std::string>>();
+ return *this;
+}
+
+Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
+ // TODO(Guotao Yu): Implement Apply
+ return NotImplemented("UpdateSchema::Apply not implemented");
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/update/update_schema.h
b/src/iceberg/update/update_schema.h
new file mode 100644
index 00000000..bed2bfeb
--- /dev/null
+++ b/src/iceberg/update/update_schema.h
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/update/update_schema.h
+/// API for schema evolution.
+
+#include <memory>
+#include <optional>
+#include <span>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+#include "iceberg/update/pending_update.h"
+
+namespace iceberg {
+
+/// \brief API for schema evolution.
+///
+/// When committing, these changes will be applied to the current table
metadata.
+/// Commit conflicts will not be resolved and will result in a CommitFailed
error.
+///
+/// TODO(Guotao Yu): Add support for V3 default values when adding columns.
Currently, all
+/// added columns use null as the default value, but Iceberg V3 supports custom
+/// default values for new columns.
+class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
+ public:
+ static Result<std::shared_ptr<UpdateSchema>> Make(
+ std::shared_ptr<Transaction> transaction);
+
+ ~UpdateSchema() override;
+
+ /// \brief Allow incompatible changes to the schema.
+ ///
+ /// Incompatible changes can cause failures when attempting to read older
data files.
+ /// For example, adding a required column and attempting to read data files
without
+ /// that column will cause a failure. However, if there are no data files
that are
+ /// not compatible with the change, it can be allowed.
+ ///
+ /// This option allows incompatible changes to be made to a schema. This
should be
+ /// used when the caller has validated that the change will not break. For
example,
+ /// if a column is added as optional but always populated and data older
than the
+ /// column addition has been deleted from the table, this can be used with
+ /// RequireColumn() to mark the column required.
+ ///
+ /// \return Reference to this for method chaining.
+ UpdateSchema& AllowIncompatibleChanges();
+
+ /// \brief Add a new optional top-level column with documentation.
+ ///
+ /// Because "." may be interpreted as a column path separator or may be used
in
+ /// field names, it is not allowed in names passed to this method. To add to
nested
+ /// structures or to add fields with names that contain ".", use
AddColumn(parent,
+ /// name, type, doc).
+ ///
+ /// If type is a nested type, its field IDs are reassigned when added to the
+ /// existing schema.
+ ///
+ /// The added column will be optional with a null default value.
+ ///
+ /// \param name Name for the new column.
+ /// \param type Type for the new column.
+ /// \param doc Documentation string for the new column.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if name contains ".".
+ UpdateSchema& AddColumn(std::string_view name, std::shared_ptr<Type> type,
+ std::string_view doc = "");
+
+ /// \brief Add a new optional column to a nested struct with documentation.
+ ///
+ /// The parent name is used to find the parent using
Schema::FindFieldByName(). If
+ /// the parent name is null or empty, the new column will be added to the
root as a
+ /// top-level column. If parent identifies a struct, a new column is added
to that
+ /// struct. If it identifies a list, the column is added to the list element
struct,
+ /// and if it identifies a map, the new column is added to the map's value
struct.
+ ///
+ /// The given name is used to name the new column and names containing "."
are not
+ /// handled differently.
+ ///
+ /// If type is a nested type, its field IDs are reassigned when added to the
+ /// existing schema.
+ ///
+ /// The added column will be optional with a null default value.
+ ///
+ /// \param parent Name of the parent struct to which the column will be
added.
+ /// \param name Name for the new column.
+ /// \param type Type for the new column.
+ /// \param doc Documentation string for the new column.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if parent doesn't identify a
struct.
+ UpdateSchema& AddColumn(std::optional<std::string_view> parent,
std::string_view name,
+ std::shared_ptr<Type> type, std::string_view doc =
"");
+
+ /// \brief Add a new required top-level column with documentation.
+ ///
+ /// Adding a required column without a default is an incompatible change
that can
+ /// break reading older data. To suppress exceptions thrown when an
incompatible
+ /// change is detected, call AllowIncompatibleChanges().
+ ///
+ /// Because "." may be interpreted as a column path separator or may be used
in
+ /// field names, it is not allowed in names passed to this method. To add to
nested
+ /// structures or to add fields with names that contain ".", use
+ /// AddRequiredColumn(parent, name, type, doc).
+ ///
+ /// If type is a nested type, its field IDs are reassigned when added to the
+ /// existing schema.
+ ///
+ /// \param name Name for the new column.
+ /// \param type Type for the new column.
+ /// \param doc Documentation string for the new column.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if name contains ".".
+ UpdateSchema& AddRequiredColumn(std::string_view name, std::shared_ptr<Type>
type,
+ std::string_view doc = "");
+
+ /// \brief Add a new required column to a nested struct with documentation.
+ ///
+ /// Adding a required column without a default is an incompatible change
that can
+ /// break reading older data. To suppress exceptions thrown when an
incompatible
+ /// change is detected, call AllowIncompatibleChanges().
+ ///
+ /// The parent name is used to find the parent using
Schema::FindFieldByName(). If
+ /// the parent name is null or empty, the new column will be added to the
root as a
+ /// top-level column. If parent identifies a struct, a new column is added
to that
+ /// struct. If it identifies a list, the column is added to the list element
struct,
+ /// and if it identifies a map, the new column is added to the map's value
struct.
+ ///
+ /// The given name is used to name the new column and names containing "."
are not
+ /// handled differently.
+ ///
+ /// If type is a nested type, its field IDs are reassigned when added to the
+ /// existing schema.
+ ///
+ /// \param parent Name of the parent struct to which the column will be
added.
+ /// \param name Name for the new column.
+ /// \param type Type for the new column.
+ /// \param doc Documentation string for the new column.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if parent doesn't identify a
struct.
+ UpdateSchema& AddRequiredColumn(std::optional<std::string_view> parent,
+ std::string_view name, std::shared_ptr<Type>
type,
+ std::string_view doc = "");
+
+ /// \brief Rename a column in the schema.
+ ///
+ /// The name is used to find the column to rename using
Schema::FindFieldByName().
+ ///
+ /// The new name may contain "." and such names are not parsed or handled
+ /// differently.
+ ///
+ /// Columns may be updated and renamed in the same schema update.
+ ///
+ /// \param name Name of the column to rename.
+ /// \param new_name Replacement name for the column.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if name doesn't identify a column
in the
+ /// schema or if
+ /// this change conflicts with other additions, renames, or updates.
+ UpdateSchema& RenameColumn(std::string_view name, std::string_view new_name);
+
+ /// \brief Update a column in the schema to a new primitive type.
+ ///
+ /// The name is used to find the column to update using
Schema::FindFieldByName().
+ ///
+ /// Only updates that widen types are allowed.
+ ///
+ /// Columns may be updated and renamed in the same schema update.
+ ///
+ /// \param name Name of the column to update.
+ /// \param new_type Replacement type for the column (must be primitive).
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if name doesn't identify a column
in the
+ /// schema or if
+ /// this change introduces a type incompatibility or if it conflicts
with
+ /// other additions, renames, or updates.
+ UpdateSchema& UpdateColumn(std::string_view name,
+ std::shared_ptr<PrimitiveType> new_type);
+
+ /// \brief Update the documentation string for a column.
+ ///
+ /// The name is used to find the column to update using
Schema::FindFieldByName().
+ ///
+ /// \param name Name of the column to update the documentation string for.
+ /// \param new_doc Replacement documentation string for the column.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if name doesn't identify a column
in the
+ /// schema or if
+ /// the column will be deleted.
+ UpdateSchema& UpdateColumnDoc(std::string_view name, std::string_view
new_doc);
+
+ /// \brief Update a column to be optional.
+ ///
+ /// \param name Name of the column to mark optional.
+ /// \return Reference to this for method chaining.
+ UpdateSchema& MakeColumnOptional(std::string_view name);
+
+ /// \brief Update a column to be required.
+ ///
+ /// This is an incompatible change that can break reading older data. This
method
+ /// will result in an exception unless AllowIncompatibleChanges() has been
called.
+ ///
+ /// \param name Name of the column to mark required.
+ /// \return Reference to this for method chaining.
+ UpdateSchema& RequireColumn(std::string_view name);
+
+ /// \brief Delete a column in the schema.
+ ///
+ /// The name is used to find the column to delete using
Schema::FindFieldByName().
+ ///
+ /// \param name Name of the column to delete.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if name doesn't identify a column
in the
+ /// schema or if
+ /// this change conflicts with other additions, renames, or updates.
+ UpdateSchema& DeleteColumn(std::string_view name);
+
+ /// \brief Move a column from its current position to the start of the
schema or its
+ /// parent struct.
+ ///
+ /// \param name Name of the column to move.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if name doesn't identify a column
in the
+ /// schema or if
+ /// this change conflicts with other changes.
+ UpdateSchema& MoveFirst(std::string_view name);
+
+ /// \brief Move a column from its current position to directly before a
reference
+ /// column.
+ ///
+ /// The name is used to find the column to move using
Schema::FindFieldByName(). If
+ /// the name identifies a nested column, it can only be moved within the
nested
+ /// struct that contains it.
+ ///
+ /// \param name Name of the column to move.
+ /// \param before_name Name of the reference column.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if name doesn't identify a column
in the
+ /// schema or if
+ /// this change conflicts with other changes.
+ UpdateSchema& MoveBefore(std::string_view name, std::string_view
before_name);
+
+ /// \brief Move a column from its current position to directly after a
reference
+ /// column.
+ ///
+ /// The name is used to find the column to move using
Schema::FindFieldByName(). If
+ /// the name identifies a nested column, it can only be moved within the
nested
+ /// struct that contains it.
+ ///
+ /// \param name Name of the column to move.
+ /// \param after_name Name of the reference column.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidArgument will be reported if name doesn't identify a column
in the
+ /// schema or if
+ /// this change conflicts with other changes.
+ UpdateSchema& MoveAfter(std::string_view name, std::string_view after_name);
+
+ /// \brief Applies all field additions and updates from the provided new
schema to
+ /// the existing schema to create a union schema.
+ ///
+ /// For fields with same canonical names in both schemas it is required that
the
+ /// widen types is supported using UpdateColumn(). Differences in type are
ignored
+ /// if the new type is narrower than the existing type (e.g. long to int,
double to
+ /// float).
+ ///
+ /// Only supports turning a previously required field into an optional one
if it is
+ /// marked optional in the provided new schema using MakeColumnOptional().
+ ///
+ /// Only supports updating existing field docs with fields docs from the
provided
+ /// new schema using UpdateColumnDoc().
+ ///
+ /// \param new_schema A schema used in conjunction with the existing schema
to
+ /// create a union schema.
+ /// \return Reference to this for method chaining.
+ /// \note InvalidState will be reported if it encounters errors during
provided schema
+ /// traversal. \note InvalidArgument will be reported if name doesn't
identify a column
+ /// in the schema or if
+ /// this change introduces a type incompatibility or if it conflicts
with
+ /// other additions, renames, or updates.
+ UpdateSchema& UnionByNameWith(std::shared_ptr<Schema> new_schema);
+
+ /// \brief Set the identifier fields given a set of field names.
+ ///
+ /// Because identifier fields are unique, duplicated names will be ignored.
See
+ /// Schema::identifier_field_ids() to learn more about Iceberg identifier.
+ ///
+ /// \param names Names of the columns to set as identifier fields.
+ /// \return Reference to this for method chaining.
+ UpdateSchema& SetIdentifierFields(const std::span<std::string_view>& names);
+
+ /// \brief Determines if the case of schema needs to be considered when
comparing
+ /// column names.
+ ///
+ /// \param case_sensitive When false case is not considered in column name
+ /// comparisons.
+ /// \return Reference to this for method chaining.
+ UpdateSchema& CaseSensitive(bool case_sensitive);
+
+ Kind kind() const final { return Kind::kUpdateSchema; }
+
+ struct ApplyResult {
+ std::shared_ptr<Schema> schema;
+ int32_t new_last_column_id;
+ };
+
+ /// \brief Apply the pending changes to the original schema and return the
result.
+ ///
+ /// This does not result in a permanent update.
+ ///
+ /// \return The result Schema and last column id when all pending updates
are applied.
+ Result<ApplyResult> Apply();
+
+ private:
+ explicit UpdateSchema(std::shared_ptr<Transaction> transaction);
+
+ /// \brief Internal implementation for adding a column with full control.
+ ///
+ /// \param parent Optional parent field name (nullopt for top-level).
+ /// \param name Name for the new column.
+ /// \param is_optional Whether the column is optional.
+ /// \param type Type for the new column.
+ /// \param doc Optional documentation string.
+ /// \return Reference to this for method chaining.
+ UpdateSchema& AddColumnInternal(std::optional<std::string_view> parent,
+ std::string_view name, bool is_optional,
+ std::shared_ptr<Type> type, std::string_view
doc);
+
+ // Internal state
+ std::shared_ptr<Schema> schema_;
+ int32_t last_column_id_;
+ bool allow_incompatible_changes_{false};
+ bool case_sensitive_{true};
+ std::unordered_set<std::string> identifier_field_names_;
+};
+
+} // namespace iceberg