wgtmac commented on code in PR #257:
URL: https://github.com/apache/iceberg-cpp/pull/257#discussion_r2431252167


##########
src/iceberg/table_metadata.cc:
##########
@@ -196,4 +198,222 @@ Status TableMetadataUtil::Write(FileIO& io, const 
std::string& location,
   return io.WriteFile(location, json_string);
 }
 
+// TableMetadataBuilder implementation
+
+struct TableMetadataBuilder::Impl {
+  // Base metadata (if building from existing metadata)
+  const TableMetadata* base{nullptr};
+
+  // Mutable fields that will be used to build the final TableMetadata
+  int8_t format_version;
+  std::string table_uuid;
+  std::string location;
+  int64_t last_sequence_number{TableMetadata::kInitialSequenceNumber};
+  TimePointMs last_updated_ms{std::chrono::milliseconds(0)};
+  int32_t last_column_id{0};
+  std::vector<std::shared_ptr<Schema>> schemas;
+  std::optional<int32_t> current_schema_id;
+  std::vector<std::shared_ptr<PartitionSpec>> partition_specs;
+  int32_t default_spec_id{0};

Review Comment:
   Do we have predefined constants for this and below? For example, 
`PartitionSpec::kInitialSpecId`?



##########
src/iceberg/table_metadata.h:
##########
@@ -144,6 +144,202 @@ ICEBERG_EXPORT std::string ToString(const 
SnapshotLogEntry& entry);
 /// \brief Returns a string representation of a MetadataLogEntry
 ICEBERG_EXPORT std::string ToString(const MetadataLogEntry& entry);
 
+/// \brief Builder class for constructing TableMetadata objects
+///
+/// This builder provides a fluent interface for creating and modifying table 
metadata.
+/// It supports both creating new tables and building from existing metadata.
+///
+/// Each modification method generates a corresponding MetadataUpdate that is 
tracked
+/// in a changes list. This allows the builder to maintain a complete history 
of all
+/// modifications made to the table metadata, which is important for tracking 
table
+/// evolution and for serialization purposes.
+class ICEBERG_EXPORT TableMetadataBuilder {
+ public:
+  /// \brief Create a builder for a new table
+  ///
+  /// \param format_version The format version for the table
+  /// \return A new TableMetadataBuilder instance
+  static std::unique_ptr<TableMetadataBuilder> BuildFromEmpty(
+      int8_t format_version = TableMetadata::kDefaultTableFormatVersion);
+
+  /// \brief Create a builder from existing table metadata
+  ///
+  /// \param base The base table metadata to build from
+  /// \return A new TableMetadataBuilder instance initialized with base 
metadata
+  static std::unique_ptr<TableMetadataBuilder> BuildFrom(const TableMetadata* 
base);
+
+  /// \brief Assign a UUID to the table
+  ///
+  /// If no UUID is provided, a random UUID will be generated.
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AssignUUID();
+
+  /// \brief Assign a specific UUID to the table
+  ///
+  /// \param uuid The UUID string to assign
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AssignUUID(std::string_view uuid);
+
+  /// \brief Upgrade the format version of the table
+  ///
+  /// \param new_format_version The new format version (must be >= current 
version)
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& UpgradeFormatVersion(int8_t new_format_version);
+
+  /// \brief Set the current schema for the table
+  ///
+  /// \param schema The schema to set as current
+  /// \param new_last_column_id The highest column ID in the schema
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& SetCurrentSchema(std::shared_ptr<Schema> schema,
+                                         int32_t new_last_column_id);
+
+  /// \brief Set the current schema by schema ID
+  ///
+  /// \param schema_id The ID of the schema to set as current
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& SetCurrentSchema(int32_t schema_id);
+
+  /// \brief Add a schema to the table
+  ///
+  /// \param schema The schema to add
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AddSchema(std::shared_ptr<Schema> schema);
+
+  /// \brief Set the default partition spec for the table
+  ///
+  /// \param spec The partition spec to set as default
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& SetDefaultPartitionSpec(std::shared_ptr<PartitionSpec> 
spec);

Review Comment:
   We cannot throw from these setters. How do you plan to deal with invalid 
inputs?
   
   1. Change the setters to return 
`Result<std::reference_wrapper<TableMetadataBuilder>>`, but this makes return 
value chaining unappealing.
   2. Change the setters to return `Status`.
   3. Postpone any error reporting. Add a state to record most recent error and 
report it in the `Build` call.
   4. ...



##########
src/iceberg/metadata_update.h:
##########
@@ -0,0 +1,431 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/metadata_update.h
+/// Metadata update operations for Iceberg tables.
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/snapshot.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+class TableMetadataBuilder;
+class UpdateRequirementsContext;

Review Comment:
   ```suggestion
   ```
   
   They are already declared by type_fwd.h



##########
src/iceberg/update_requirement.h:
##########
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/update_requirement.h
+/// Update requirements for Iceberg table operations.
+///
+/// Update requirements are conditions that must be satisfied before
+/// applying metadata updates to a table. They are used for optimistic
+/// concurrency control in table operations.
+
+#include <memory>
+#include <optional>
+#include <string>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+/// \brief Base class for update requirement operations
+///
+/// Represents a requirement that must be validated before applying
+/// metadata updates to a table. Each concrete subclass represents
+/// a specific type of requirement check.
+class ICEBERG_EXPORT UpdateRequirement {
+ public:
+  virtual ~UpdateRequirement() = default;
+
+  /// \brief Clone this update requirement
+  virtual std::unique_ptr<UpdateRequirement> Clone() const = 0;

Review Comment:
   Is it actually used? If not, remove it for now?



##########
src/iceberg/update_requirement.cc:
##########
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/update_requirement.h"
+
+#include "iceberg/snapshot.h"
+#include "iceberg/table_metadata.h"
+#include "iceberg/util/string_util.h"
+
+namespace iceberg {
+
+Status AssertTableDoesNotExist::Validate(const TableMetadata* base) const {

Review Comment:
   I haven't reviewed all the function definitions yet but I strongly recommend 
to leave implementation out in this PR. We can add individual followup PRs to 
make it easier to review. Let's focus on the interface design here.



##########
src/iceberg/table_metadata.cc:
##########
@@ -196,4 +198,222 @@ Status TableMetadataUtil::Write(FileIO& io, const 
std::string& location,
   return io.WriteFile(location, json_string);
 }
 
+// TableMetadataBuilder implementation
+
+struct TableMetadataBuilder::Impl {
+  // Base metadata (if building from existing metadata)
+  const TableMetadata* base{nullptr};
+
+  // Mutable fields that will be used to build the final TableMetadata
+  int8_t format_version;
+  std::string table_uuid;
+  std::string location;
+  int64_t last_sequence_number{TableMetadata::kInitialSequenceNumber};
+  TimePointMs last_updated_ms{std::chrono::milliseconds(0)};
+  int32_t last_column_id{0};
+  std::vector<std::shared_ptr<Schema>> schemas;
+  std::optional<int32_t> current_schema_id;
+  std::vector<std::shared_ptr<PartitionSpec>> partition_specs;
+  int32_t default_spec_id{0};
+  int32_t last_partition_id{0};
+  std::unordered_map<std::string, std::string> properties;
+  int64_t current_snapshot_id{-1};
+  std::vector<std::shared_ptr<Snapshot>> snapshots;
+  std::vector<SnapshotLogEntry> snapshot_log;
+  std::vector<MetadataLogEntry> metadata_log;
+  std::vector<std::shared_ptr<SortOrder>> sort_orders;
+  int32_t default_sort_order_id{0};
+  std::unordered_map<std::string, std::shared_ptr<SnapshotRef>> refs;
+  std::vector<std::shared_ptr<StatisticsFile>> statistics;
+  std::vector<std::shared_ptr<PartitionStatisticsFile>> partition_statistics;
+  int64_t next_row_id{TableMetadata::kInitialRowId};
+
+  // List of changes (MetadataUpdate objects)
+  std::vector<std::unique_ptr<MetadataUpdate>> changes;
+
+  explicit Impl(int8_t fmt_version) : format_version(fmt_version) {}
+
+  explicit Impl(const TableMetadata* base_metadata)
+      : base(base_metadata),

Review Comment:
   What if `base_metadata` is nullptr?



##########
src/iceberg/update_requirements.h:
##########
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/update_requirements.h
+/// Factory for generating update requirements from metadata updates.
+///
+/// This utility class generates the appropriate UpdateRequirement instances
+/// based on a list of MetadataUpdate operations. The requirements are used
+/// for optimistic concurrency control when committing table changes.
+
+#include <memory>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/type_fwd.h"
+#include "iceberg/update_requirement.h"
+
+namespace iceberg {
+
+/// \brief Context for generating update requirements
+///
+/// This context is passed to each MetadataUpdate's GenerateRequirements method
+/// and maintains state about what requirements have already been added to 
avoid
+/// duplicates.
+class ICEBERG_EXPORT UpdateRequirementsContext {
+ public:
+  /// \brief Construct a context for requirement generation
+  ///
+  /// \param base The base table metadata (may be nullptr for table creation)
+  /// \param is_replace Whether this is a replace operation (more permissive)
+  UpdateRequirementsContext(const TableMetadata* base, bool is_replace)
+      : base_(base), is_replace_(is_replace) {}
+
+  // Delete copy operations (contains unique_ptr members)
+  UpdateRequirementsContext(const UpdateRequirementsContext&) = delete;
+  UpdateRequirementsContext& operator=(const UpdateRequirementsContext&) = 
delete;
+
+  // Enable move construction only (assignment deleted due to const members)
+  UpdateRequirementsContext(UpdateRequirementsContext&&) noexcept = default;
+
+  /// \brief Add a requirement to the list
+  void AddRequirement(std::unique_ptr<UpdateRequirement> requirement) {
+    requirements_.push_back(std::move(requirement));
+  }
+
+  /// \brief Get the base table metadata
+  const TableMetadata* base() const { return base_; }
+
+  /// \brief Check if this is a replace operation
+  bool is_replace() const { return is_replace_; }
+
+  /// \brief Build and return the list of requirements
+  std::vector<std::unique_ptr<UpdateRequirement>> Build() {

Review Comment:
   How do we handle errors? Should we return a `Result` wrapper?



##########
src/iceberg/metadata_update.h:
##########
@@ -0,0 +1,431 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/metadata_update.h
+/// Metadata update operations for Iceberg tables.
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/snapshot.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+class TableMetadataBuilder;
+class UpdateRequirementsContext;
+
+/// \brief Base class for metadata update operations
+///
+/// Represents a change to table metadata. Each concrete subclass
+/// represents a specific type of update operation.
+class ICEBERG_EXPORT MetadataUpdate {
+ public:
+  virtual ~MetadataUpdate() = default;
+
+  /// \brief Clone this metadata update
+  virtual std::unique_ptr<MetadataUpdate> Clone() const = 0;
+
+  /// \brief Apply this update to a TableMetadataBuilder
+  ///
+  /// This method modifies the builder by applying the update operation
+  /// it represents. Each subclass implements this to apply its specific
+  /// type of update.
+  ///
+  /// \param builder The builder to apply this update to
+  virtual void ApplyTo(TableMetadataBuilder& builder) const = 0;
+
+  /// \brief Generate update requirements for this metadata update
+  ///
+  /// This method generates the appropriate UpdateRequirement instances
+  /// that must be validated before this update can be applied. The context
+  /// provides information about the base metadata and operation mode.
+  ///
+  /// \param context The context containing base metadata and operation state
+  virtual void GenerateRequirements(UpdateRequirementsContext& context) const 
= 0;

Review Comment:
   What about renaming `UpdateRequirementsContext` to `MetadataUpdateContext` 
or `TableMetadataUpdateContext` (same as iceberg-python) ?



##########
src/iceberg/table_metadata.h:
##########
@@ -144,6 +145,203 @@ ICEBERG_EXPORT std::string ToString(const 
SnapshotLogEntry& entry);
 /// \brief Returns a string representation of a MetadataLogEntry
 ICEBERG_EXPORT std::string ToString(const MetadataLogEntry& entry);
 
+/// \brief Builder class for constructing TableMetadata objects
+///
+/// This builder provides a fluent interface for creating and modifying table 
metadata.
+/// It supports both creating new tables and building from existing metadata.
+///
+/// Each modification method generates a corresponding MetadataUpdate that is 
tracked
+/// in a changes list. This allows the builder to maintain a complete history 
of all
+/// modifications made to the table metadata, which is important for tracking 
table
+/// evolution and for serialization purposes.
+class ICEBERG_EXPORT TableMetadataBuilder {
+ public:
+  /// \brief Create a builder for a new table
+  ///
+  /// \param format_version The format version for the table
+  /// \return A new TableMetadataBuilder instance
+  static TableMetadataBuilder BuildFromEmpty(
+      int8_t format_version = TableMetadata::kDefaultTableFormatVersion);
+
+  /// \brief Create a builder from existing table metadata
+  ///
+  /// \param base The base table metadata to build from
+  /// \return A new TableMetadataBuilder instance initialized with base 
metadata
+  static TableMetadataBuilder BuildFrom(const std::shared_ptr<const 
TableMetadata>& base);
+
+  /// \brief Assign a UUID to the table
+  ///
+  /// If no UUID is provided, a random UUID will be generated.
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AssignUUID();

Review Comment:
   I just took a quick glance and it seems that `Builder 
withMetadataLocation(String newMetadataLocation)` is missing?



##########
src/iceberg/metadata_update.h:
##########
@@ -0,0 +1,431 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/metadata_update.h
+/// Metadata update operations for Iceberg tables.
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/snapshot.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+class TableMetadataBuilder;
+class UpdateRequirementsContext;
+
+/// \brief Base class for metadata update operations
+///
+/// Represents a change to table metadata. Each concrete subclass
+/// represents a specific type of update operation.
+class ICEBERG_EXPORT MetadataUpdate {
+ public:
+  virtual ~MetadataUpdate() = default;
+
+  /// \brief Clone this metadata update
+  virtual std::unique_ptr<MetadataUpdate> Clone() const = 0;
+
+  /// \brief Apply this update to a TableMetadataBuilder
+  ///
+  /// This method modifies the builder by applying the update operation
+  /// it represents. Each subclass implements this to apply its specific
+  /// type of update.
+  ///
+  /// \param builder The builder to apply this update to
+  virtual void ApplyTo(TableMetadataBuilder& builder) const = 0;
+
+  /// \brief Generate update requirements for this metadata update
+  ///
+  /// This method generates the appropriate UpdateRequirement instances
+  /// that must be validated before this update can be applied. The context
+  /// provides information about the base metadata and operation mode.
+  ///
+  /// \param context The context containing base metadata and operation state
+  virtual void GenerateRequirements(UpdateRequirementsContext& context) const 
= 0;

Review Comment:
   Perhaps like below:
   
   ```
   virtual void AddTo(MetadataUpdateContext& context) const = 0;
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to