gty404 commented on code in PR #486:
URL: https://github.com/apache/iceberg-cpp/pull/486#discussion_r2664328215
##########
src/iceberg/update/update_schema.cc:
##########
@@ -195,13 +391,191 @@ UpdateSchema&
UpdateSchema::UnionByNameWith(std::shared_ptr<Schema> new_schema)
UpdateSchema& UpdateSchema::SetIdentifierFields(
const std::span<std::string_view>& names) {
- identifier_field_names_ = names |
std::ranges::to<std::unordered_set<std::string>>();
+ identifier_field_names_ = names |
std::ranges::to<std::vector<std::string>>();
return *this;
}
Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
- // TODO(Guotao Yu): Implement Apply
- return NotImplemented("UpdateSchema::Apply not implemented");
+ ICEBERG_RETURN_UNEXPECTED(CheckErrors());
+
+ // Validate existing identifier fields are not deleted
+ for (const auto& name : identifier_field_names_) {
+ ICEBERG_ASSIGN_OR_RAISE(auto field_opt, FindField(name));
+ if (field_opt.has_value()) {
+ const auto& field = field_opt->get();
+ auto field_id = field.field_id();
+
+ // Check the field itself is not deleted
+ ICEBERG_CHECK(!deletes_.contains(field_id),
+ "Cannot delete identifier field {}. To force deletion,
also call "
+ "SetIdentifierFields to update identifier fields.",
+ name);
+
+ // Check no parent of this field is deleted
+ auto parent_it = id_to_parent_.find(field_id);
+ while (parent_it != id_to_parent_.end()) {
+ int32_t parent_id = parent_it->second;
+ ICEBERG_ASSIGN_OR_RAISE(auto parent_field_opt,
schema_->FindFieldById(parent_id));
+ ICEBERG_CHECK(
+ !deletes_.contains(parent_id),
+ "Cannot delete field {} as it will delete nested identifier field
{}",
+ parent_field_opt.has_value() ?
std::string(parent_field_opt->get().name())
+ : std::to_string(parent_id),
+ name);
+ parent_it = id_to_parent_.find(parent_id);
+ }
+ }
+ }
+
+ // Apply schema changes using visitor pattern
+ // Create a temporary struct type from the schema to use with the visitor
+ auto schema_struct_type = std::make_shared<StructType>(
+ schema_->fields() | std::ranges::to<std::vector<SchemaField>>());
+
+ // Apply changes recursively using the visitor
+ ApplyChangesVisitor visitor(deletes_, updates_, parent_to_added_ids_);
+ ICEBERG_ASSIGN_OR_RAISE(auto new_type,
+ visitor.ApplyChanges(schema_struct_type,
kTableRootId));
+
+ // Cast result back to StructType and extract fields
+ auto new_struct_type = internal::checked_pointer_cast<StructType>(new_type);
+ std::vector<SchemaField> new_fields(new_struct_type->fields() |
+
std::ranges::to<std::vector<SchemaField>>());
+
+ // Convert identifier field names to IDs
+ auto temp_schema = std::make_shared<Schema>(new_fields);
+ std::vector<int32_t> fresh_identifier_ids;
+ for (const auto& name : identifier_field_names_) {
+ ICEBERG_ASSIGN_OR_RAISE(auto field_opt,
+ temp_schema->FindFieldByName(name,
case_sensitive_));
+ ICEBERG_CHECK(field_opt.has_value(),
+ "Cannot add field {} as an identifier field: not found in
current "
+ "schema or added columns",
+ name);
+ fresh_identifier_ids.push_back(field_opt->get().field_id());
+ }
+
+ // Create the new schema
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto new_schema,
+ Schema::Make(std::move(new_fields), schema_->schema_id(),
fresh_identifier_ids));
+
+ return ApplyResult{.schema = std::move(new_schema),
+ .new_last_column_id = last_column_id_};
+}
+
+UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view>
parent,
+ std::string_view name, bool
is_optional,
+ std::shared_ptr<Type> type,
+ std::string_view doc) {
+ int32_t parent_id = kTableRootId;
+ std::string full_name;
+
+ // Handle parent field
+ if (parent.has_value() && !parent->empty()) {
Review Comment:
I think using optional is more clear, but I will check if parent is set but
empty, an error will be reported.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]