wgtmac commented on code in PR #345:
URL: https://github.com/apache/iceberg-cpp/pull/345#discussion_r2567642216


##########
src/iceberg/table_metadata.cc:
##########
@@ -361,7 +403,68 @@ TableMetadataBuilder& 
TableMetadataBuilder::SetDefaultSortOrder(int32_t order_id
 
 TableMetadataBuilder& TableMetadataBuilder::AddSortOrder(
     std::shared_ptr<SortOrder> order) {
-  throw IcebergError(std::format("{} not implemented", __FUNCTION__));
+  int32_t new_order_id = impl_->reuseOrCreateNewSortOrderId(*order);

Review Comment:
   What about implementing `SetDefaultSortOrder`s as well? The logic below 
should be reused for them as well.



##########
src/iceberg/table_metadata.cc:
##########
@@ -361,7 +403,68 @@ TableMetadataBuilder& 
TableMetadataBuilder::SetDefaultSortOrder(int32_t order_id
 
 TableMetadataBuilder& TableMetadataBuilder::AddSortOrder(
     std::shared_ptr<SortOrder> order) {
-  throw IcebergError(std::format("{} not implemented", __FUNCTION__));
+  int32_t new_order_id = impl_->reuseOrCreateNewSortOrderId(*order);
+
+  if (impl_->sort_orders_by_id.find(new_order_id) != 
impl_->sort_orders_by_id.end()) {
+    // update last_added_order_id if the order was added in this set of 
changes (since it
+    // is now the last)
+    bool is_new_order = false;
+    for (const auto& change : impl_->changes) {
+      auto* add_sort_order = dynamic_cast<table::AddSortOrder*>(change.get());
+      if (add_sort_order && add_sort_order->sort_order()->order_id() == 
new_order_id) {
+        is_new_order = true;
+        break;
+      }
+    }
+    impl_->last_added_order_id =
+        is_new_order ? std::make_optional(new_order_id) : std::nullopt;
+    return *this;
+  }
+
+  // Get current schema for validation
+  auto schema_result = impl_->metadata.Schema();
+  if (!schema_result) {
+    impl_->errors.emplace_back(
+        ErrorKind::kInvalidArgument,
+        std::format("Cannot find current schema: {}", 
schema_result.error().message));
+    return *this;
+  }
+
+  auto schema = schema_result.value();
+
+  // Validate the sort order against the schema
+  auto validate_status = order->Validate(*schema);
+  if (!validate_status) {
+    impl_->errors.emplace_back(
+        ErrorKind::kInvalidArgument,
+        std::format("Sort order validation failed: {}", 
validate_status.error().message));
+    return *this;
+  }

Review Comment:
   ```suggestion
     // Get current schema and validate the sort order against it
     BUILDER_ASSIGN_OR_RETURN(auto schema, impl_->metadata.Schema());
     BUILDER_RETURN_IF_ERROR(order->Validate(*schema));
   ```
   
   Of course you need to define these to the top:
   ```cpp
   #define BUILDER_RETURN_IF_ERROR(result)                         \
     if (auto&& result_name = result; !result_name) [[unlikely]] { \
       impl_->errors.emplace_back(std::move(result_name.error())); \
       return *this;                                               \
     }
   
   #define BUILDER_ASSIGN_OR_RETURN_IMPL(result_name, lhs, rexpr) \
     auto&& result_name = (rexpr);                                \
     BUILDER_RETURN_IF_ERROR(result_name)                         \
     lhs = std::move(result_name.value());
   
   #define BUILDER_ASSIGN_OR_RETURN(lhs, rexpr)                                 
            \
     BUILDER_ASSIGN_OR_RETURN_IMPL(ICEBERG_ASSIGN_OR_RAISE_NAME(result_, 
__COUNTER__), lhs, \
                                   rexpr)
   ```



##########
src/iceberg/table_metadata.cc:
##########
@@ -361,7 +403,68 @@ TableMetadataBuilder& 
TableMetadataBuilder::SetDefaultSortOrder(int32_t order_id
 
 TableMetadataBuilder& TableMetadataBuilder::AddSortOrder(
     std::shared_ptr<SortOrder> order) {
-  throw IcebergError(std::format("{} not implemented", __FUNCTION__));
+  int32_t new_order_id = impl_->reuseOrCreateNewSortOrderId(*order);
+
+  if (impl_->sort_orders_by_id.find(new_order_id) != 
impl_->sort_orders_by_id.end()) {
+    // update last_added_order_id if the order was added in this set of 
changes (since it
+    // is now the last)
+    bool is_new_order = false;
+    for (const auto& change : impl_->changes) {
+      auto* add_sort_order = dynamic_cast<table::AddSortOrder*>(change.get());
+      if (add_sort_order && add_sort_order->sort_order()->order_id() == 
new_order_id) {
+        is_new_order = true;
+        break;
+      }
+    }

Review Comment:
   ```suggestion
       bool is_new_order =
           impl_->last_added_order_id.has_value() &&
           std::ranges::find_if(impl_->changes, [new_order_id](const auto& 
change) {
             auto* add_sort_order = 
dynamic_cast<table::AddSortOrder*>(change.get());
             return add_sort_order &&
                    add_sort_order->sort_order()->order_id() == new_order_id;
           }) != impl_->changes.cend();
   ```



##########
src/iceberg/test/table_metadata_builder_test.cc:
##########
@@ -124,17 +140,121 @@ TEST(TableMetadataBuilderTest, AssignUUID) {
   EXPECT_EQ(metadata->table_uuid, "TEST-UUID-ABCD");  // Original case 
preserved
 }
 
+// Test AddSortOrder
+TEST(TableMetadataBuilderTest, AddSortOrderBasic) {
+  auto base = CreateBaseMetadata();
+  auto builder = TableMetadataBuilder::BuildFrom(base.get());
+  auto schema = CreateTestSchema();
+
+  // 1. Add unsorted - should reuse existing unsorted order
+  builder->AddSortOrder(SortOrder::Unsorted());
+  ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
+  ASSERT_EQ(metadata->sort_orders.size(), 1);
+  EXPECT_TRUE(metadata->sort_orders[0]->is_unsorted());
+
+  // 2. Add basic sort order
+  builder = TableMetadataBuilder::BuildFrom(base.get());
+  SortField field1(1, Transform::Identity(), SortDirection::kAscending,
+                   NullOrder::kFirst);
+  ICEBERG_UNWRAP_OR_FAIL(auto order1,
+                         SortOrder::Make(*schema, 1, 
std::vector<SortField>{field1}));
+  builder->AddSortOrder(std::move(order1));
+  ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
+  ASSERT_EQ(metadata->sort_orders.size(), 2);
+  EXPECT_EQ(metadata->sort_orders[1]->order_id(), 1);
+
+  // 3. Add duplicate - should be idempotent
+  builder = TableMetadataBuilder::BuildFrom(base.get());
+  ICEBERG_UNWRAP_OR_FAIL(auto order2,
+                         SortOrder::Make(*schema, 1, 
std::vector<SortField>{field1}));
+  ICEBERG_UNWRAP_OR_FAIL(auto order3,
+                         SortOrder::Make(*schema, 1, 
std::vector<SortField>{field1}));
+  builder->AddSortOrder(std::move(order2));
+  builder->AddSortOrder(std::move(order3));  // Duplicate
+  ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
+  ASSERT_EQ(metadata->sort_orders.size(), 2);  // Only one added
+
+  // 4. Add multiple different orders + verify ID reassignment
+  builder = TableMetadataBuilder::BuildFrom(base.get());
+  SortField field2(2, Transform::Identity(), SortDirection::kDescending,
+                   NullOrder::kLast);
+  // User provides ID=99, Builder should reassign to ID=1
+  ICEBERG_UNWRAP_OR_FAIL(auto order4,
+                         SortOrder::Make(*schema, 99, 
std::vector<SortField>{field1}));
+  ICEBERG_UNWRAP_OR_FAIL(
+      auto order5, SortOrder::Make(*schema, 2, std::vector<SortField>{field1, 
field2}));
+  builder->AddSortOrder(std::move(order4));
+  builder->AddSortOrder(std::move(order5));
+  ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
+  ASSERT_EQ(metadata->sort_orders.size(), 3);
+  EXPECT_EQ(metadata->sort_orders[1]->order_id(), 1);  // Reassigned from 99
+  EXPECT_EQ(metadata->sort_orders[2]->order_id(), 2);
+}
+
+TEST(TableMetadataBuilderTest, AddSortOrderInvalid) {
+  auto base = CreateBaseMetadata();
+  auto schema = CreateTestSchema();
+
+  // 1. Invalid field ID
+  auto builder = TableMetadataBuilder::BuildFrom(base.get());
+  SortField invalid_field(999, Transform::Identity(), 
SortDirection::kAscending,
+                          NullOrder::kFirst);
+  ICEBERG_UNWRAP_OR_FAIL(auto order1,
+                         SortOrder::Make(1, 
std::vector<SortField>{invalid_field}));
+  builder->AddSortOrder(std::move(order1));
+  ASSERT_THAT(builder->Build(), IsError(ErrorKind::kCommitFailed));
+  ASSERT_THAT(builder->Build(),
+              HasErrorMessage("Cannot find source column for sort field"));
+
+  // 2. Invalid transform (Day transform on string type)
+  builder = TableMetadataBuilder::BuildFrom(base.get());
+  SortField invalid_transform(2, Transform::Day(), SortDirection::kAscending,
+                              NullOrder::kFirst);
+  ICEBERG_UNWRAP_OR_FAIL(auto order2,
+                         SortOrder::Make(1, 
std::vector<SortField>{invalid_transform}));
+  builder->AddSortOrder(std::move(order2));
+  ASSERT_THAT(builder->Build(), IsError(ErrorKind::kCommitFailed));
+  ASSERT_THAT(builder->Build(), HasErrorMessage("Invalid source type"));
+
+  // 3. Without schema
+  builder = TableMetadataBuilder::BuildFromEmpty(2);
+  builder->AssignUUID("test-uuid");
+  SortField field(1, Transform::Identity(), SortDirection::kAscending, 
NullOrder::kFirst);
+  ICEBERG_UNWRAP_OR_FAIL(auto order3,
+                         SortOrder::Make(*schema, 1, 
std::vector<SortField>{field}));
+  builder->AddSortOrder(std::move(order3));
+  ASSERT_THAT(builder->Build(), IsError(ErrorKind::kCommitFailed));
+  ASSERT_THAT(builder->Build(), HasErrorMessage("Cannot find current schema"));
+}
+
 // Test applying TableUpdate to builder
 TEST(TableMetadataBuilderTest, ApplyUpdate) {
+  // Use base metadata with schema for all update tests
+  auto base = CreateBaseMetadata();
+  auto builder = TableMetadataBuilder::BuildFrom(base.get());
+
   // Apply AssignUUID update
-  auto builder = TableMetadataBuilder::BuildFromEmpty(2);
-  table::AssignUUID update("apply-uuid");
-  update.ApplyTo(*builder);
-  // TODO(Li Feiyang): Add more update and `apply` once other build methods are
-  // implemented
+  table::AssignUUID uuid_update("apply-uuid");
+  uuid_update.ApplyTo(*builder);
+
+  // Apply AddSortOrder update

Review Comment:
   I'd suggest splitting this into individual cases.



##########
src/iceberg/table_metadata.cc:
##########
@@ -239,7 +250,38 @@ struct TableMetadataBuilder::Impl {
 
   // Constructor from existing metadata
   explicit Impl(const TableMetadata* base_metadata)
-      : base(base_metadata), metadata(*base_metadata) {}
+      : base(base_metadata), metadata(*base_metadata) {
+    // Initialize index maps from base metadata
+    for (const auto& schema : metadata.schemas) {
+      if (schema->schema_id().has_value()) {
+        schemas_by_id.emplace(schema->schema_id().value(), schema);
+      }
+    }
+
+    for (const auto& spec : metadata.partition_specs) {
+      specs_by_id.emplace(spec->spec_id(), spec);
+    }
+
+    for (const auto& order : metadata.sort_orders) {
+      sort_orders_by_id.emplace(order->order_id(), order);
+    }
+  }
+
+  int32_t reuseOrCreateNewSortOrderId(const SortOrder& new_order) {

Review Comment:
   ```suggestion
     int32_t ReuseOrCreateNewSortOrderId(const SortOrder& new_order) {
   ```



##########
src/iceberg/table_metadata.cc:
##########
@@ -361,7 +403,68 @@ TableMetadataBuilder& 
TableMetadataBuilder::SetDefaultSortOrder(int32_t order_id
 
 TableMetadataBuilder& TableMetadataBuilder::AddSortOrder(
     std::shared_ptr<SortOrder> order) {
-  throw IcebergError(std::format("{} not implemented", __FUNCTION__));
+  int32_t new_order_id = impl_->reuseOrCreateNewSortOrderId(*order);
+
+  if (impl_->sort_orders_by_id.find(new_order_id) != 
impl_->sort_orders_by_id.end()) {
+    // update last_added_order_id if the order was added in this set of 
changes (since it
+    // is now the last)
+    bool is_new_order = false;
+    for (const auto& change : impl_->changes) {
+      auto* add_sort_order = dynamic_cast<table::AddSortOrder*>(change.get());
+      if (add_sort_order && add_sort_order->sort_order()->order_id() == 
new_order_id) {
+        is_new_order = true;
+        break;
+      }
+    }
+    impl_->last_added_order_id =
+        is_new_order ? std::make_optional(new_order_id) : std::nullopt;
+    return *this;
+  }
+
+  // Get current schema for validation
+  auto schema_result = impl_->metadata.Schema();
+  if (!schema_result) {
+    impl_->errors.emplace_back(
+        ErrorKind::kInvalidArgument,
+        std::format("Cannot find current schema: {}", 
schema_result.error().message));
+    return *this;
+  }
+
+  auto schema = schema_result.value();
+
+  // Validate the sort order against the schema
+  auto validate_status = order->Validate(*schema);
+  if (!validate_status) {
+    impl_->errors.emplace_back(
+        ErrorKind::kInvalidArgument,
+        std::format("Sort order validation failed: {}", 
validate_status.error().message));
+    return *this;
+  }
+
+  std::shared_ptr<SortOrder> new_order;
+  if (order->is_unsorted()) {
+    new_order = SortOrder::Unsorted();
+  } else {
+    // TODO(Li Feiyang): rebuild the sort order using new column ids 
(freshSortOrder)
+    // For now, create a new sort order with the provided fields
+    auto sort_order_result = SortOrder::Make(
+        *schema, new_order_id,
+        std::vector<SortField>(order->fields().begin(), 
order->fields().end()));
+    if (!sort_order_result) {
+      impl_->errors.emplace_back(ErrorKind::kInvalidArgument,
+                                 std::format("Failed to create sort order: {}",
+                                             
sort_order_result.error().message));
+      return *this;

Review Comment:
   ```suggestion
       // Unlike freshSortOrder from Java impl, we don't use field name from 
old bound
       // schema to rebuild the sort order.
       BUILDER_ASSIGN_OR_RETURN(
           new_order,
           SortOrder::Make(new_order_id, 
std::vector<SortField>(order->fields().begin(),
                                                                
order->fields().end())));
   ```



##########
src/iceberg/test/table_update_test.cc:
##########
@@ -86,4 +100,23 @@ TEST(TableUpdateTest, AssignUUIDGenerateRequirements) {
   EXPECT_TRUE(empty_uuid_reqs.empty());
 }
 
+TEST(TableUpdateTest, AddSortOrderGenerateRequirements) {
+  auto schema = CreateTestSchema();
+  SortField sort_field(1, Transform::Identity(), SortDirection::kAscending,
+                       NullOrder::kFirst);
+  ICEBERG_UNWRAP_OR_FAIL(auto sort_order_unique,
+                         SortOrder::Make(*schema, 1, 
std::vector<SortField>{sort_field}));
+  auto sort_order = std::shared_ptr<SortOrder>(std::move(sort_order_unique));

Review Comment:
   ```suggestion
     ICEBERG_UNWRAP_OR_FAIL(std::shared_ptr<SortOrder> sort_order,
                            SortOrder::Make(*schema, 1, 
std::vector<SortField>{sort_field}));
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to