wgtmac commented on code in PR #257:
URL: https://github.com/apache/iceberg-cpp/pull/257#discussion_r2429216468


##########
src/iceberg/table_metadata.h:
##########
@@ -29,6 +29,7 @@
 #include <vector>
 
 #include "iceberg/iceberg_export.h"
+#include "iceberg/metadata_update.h"

Review Comment:
   Use forward declaration and remove this?



##########
src/iceberg/table_metadata.h:
##########
@@ -144,6 +145,203 @@ ICEBERG_EXPORT std::string ToString(const 
SnapshotLogEntry& entry);
 /// \brief Returns a string representation of a MetadataLogEntry
 ICEBERG_EXPORT std::string ToString(const MetadataLogEntry& entry);
 
+/// \brief Builder class for constructing TableMetadata objects
+///
+/// This builder provides a fluent interface for creating and modifying table 
metadata.
+/// It supports both creating new tables and building from existing metadata.
+///
+/// Each modification method generates a corresponding MetadataUpdate that is 
tracked
+/// in a changes list. This allows the builder to maintain a complete history 
of all
+/// modifications made to the table metadata, which is important for tracking 
table
+/// evolution and for serialization purposes.
+class ICEBERG_EXPORT TableMetadataBuilder {
+ public:
+  /// \brief Create a builder for a new table
+  ///
+  /// \param format_version The format version for the table
+  /// \return A new TableMetadataBuilder instance
+  static TableMetadataBuilder BuildFromEmpty(
+      int8_t format_version = TableMetadata::kDefaultTableFormatVersion);
+
+  /// \brief Create a builder from existing table metadata
+  ///
+  /// \param base The base table metadata to build from
+  /// \return A new TableMetadataBuilder instance initialized with base 
metadata
+  static TableMetadataBuilder BuildFrom(const std::shared_ptr<const 
TableMetadata>& base);
+
+  /// \brief Assign a UUID to the table
+  ///
+  /// If no UUID is provided, a random UUID will be generated.
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AssignUUID();
+
+  /// \brief Assign a specific UUID to the table
+  ///
+  /// \param uuid The UUID string to assign
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AssignUUID(const std::string& uuid);
+
+  /// \brief Upgrade the format version of the table
+  ///
+  /// \param new_format_version The new format version (must be >= current 
version)
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& UpgradeFormatVersion(int8_t new_format_version);
+
+  /// \brief Set the current schema for the table
+  ///
+  /// \param schema The schema to set as current
+  /// \param new_last_column_id The highest column ID in the schema
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& SetCurrentSchema(std::shared_ptr<iceberg::Schema> 
schema,

Review Comment:
   ```suggestion
     TableMetadataBuilder& SetCurrentSchema(std::shared_ptr<Schema> schema,
   ```



##########
src/iceberg/table_metadata.h:
##########
@@ -144,6 +145,203 @@ ICEBERG_EXPORT std::string ToString(const 
SnapshotLogEntry& entry);
 /// \brief Returns a string representation of a MetadataLogEntry
 ICEBERG_EXPORT std::string ToString(const MetadataLogEntry& entry);
 
+/// \brief Builder class for constructing TableMetadata objects
+///
+/// This builder provides a fluent interface for creating and modifying table 
metadata.
+/// It supports both creating new tables and building from existing metadata.
+///
+/// Each modification method generates a corresponding MetadataUpdate that is 
tracked
+/// in a changes list. This allows the builder to maintain a complete history 
of all
+/// modifications made to the table metadata, which is important for tracking 
table
+/// evolution and for serialization purposes.
+class ICEBERG_EXPORT TableMetadataBuilder {
+ public:
+  /// \brief Create a builder for a new table
+  ///
+  /// \param format_version The format version for the table
+  /// \return A new TableMetadataBuilder instance
+  static TableMetadataBuilder BuildFromEmpty(

Review Comment:
   Should we return `std::unique_ptr<TableMetadataBuilder>`? I'm also fine with 
the current one.



##########
src/iceberg/table_metadata.h:
##########
@@ -144,6 +145,203 @@ ICEBERG_EXPORT std::string ToString(const 
SnapshotLogEntry& entry);
 /// \brief Returns a string representation of a MetadataLogEntry
 ICEBERG_EXPORT std::string ToString(const MetadataLogEntry& entry);
 
+/// \brief Builder class for constructing TableMetadata objects
+///
+/// This builder provides a fluent interface for creating and modifying table 
metadata.
+/// It supports both creating new tables and building from existing metadata.
+///
+/// Each modification method generates a corresponding MetadataUpdate that is 
tracked
+/// in a changes list. This allows the builder to maintain a complete history 
of all
+/// modifications made to the table metadata, which is important for tracking 
table
+/// evolution and for serialization purposes.
+class ICEBERG_EXPORT TableMetadataBuilder {
+ public:
+  /// \brief Create a builder for a new table
+  ///
+  /// \param format_version The format version for the table
+  /// \return A new TableMetadataBuilder instance
+  static TableMetadataBuilder BuildFromEmpty(
+      int8_t format_version = TableMetadata::kDefaultTableFormatVersion);
+
+  /// \brief Create a builder from existing table metadata
+  ///
+  /// \param base The base table metadata to build from
+  /// \return A new TableMetadataBuilder instance initialized with base 
metadata
+  static TableMetadataBuilder BuildFrom(const std::shared_ptr<const 
TableMetadata>& base);
+
+  /// \brief Assign a UUID to the table
+  ///
+  /// If no UUID is provided, a random UUID will be generated.
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AssignUUID();
+
+  /// \brief Assign a specific UUID to the table
+  ///
+  /// \param uuid The UUID string to assign
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AssignUUID(const std::string& uuid);

Review Comment:
   ```suggestion
     TableMetadataBuilder& AssignUUID(std::string_view uuid);
   ```



##########
src/iceberg/table_metadata.h:
##########
@@ -144,6 +145,203 @@ ICEBERG_EXPORT std::string ToString(const 
SnapshotLogEntry& entry);
 /// \brief Returns a string representation of a MetadataLogEntry
 ICEBERG_EXPORT std::string ToString(const MetadataLogEntry& entry);
 
+/// \brief Builder class for constructing TableMetadata objects
+///
+/// This builder provides a fluent interface for creating and modifying table 
metadata.
+/// It supports both creating new tables and building from existing metadata.
+///
+/// Each modification method generates a corresponding MetadataUpdate that is 
tracked
+/// in a changes list. This allows the builder to maintain a complete history 
of all
+/// modifications made to the table metadata, which is important for tracking 
table
+/// evolution and for serialization purposes.
+class ICEBERG_EXPORT TableMetadataBuilder {
+ public:
+  /// \brief Create a builder for a new table
+  ///
+  /// \param format_version The format version for the table
+  /// \return A new TableMetadataBuilder instance
+  static TableMetadataBuilder BuildFromEmpty(
+      int8_t format_version = TableMetadata::kDefaultTableFormatVersion);
+
+  /// \brief Create a builder from existing table metadata
+  ///
+  /// \param base The base table metadata to build from
+  /// \return A new TableMetadataBuilder instance initialized with base 
metadata
+  static TableMetadataBuilder BuildFrom(const std::shared_ptr<const 
TableMetadata>& base);
+
+  /// \brief Assign a UUID to the table
+  ///
+  /// If no UUID is provided, a random UUID will be generated.
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AssignUUID();

Review Comment:
   Any specific reason to choose this subset of functions? Should we add all 
Java equivalents to not miss any?



##########
src/iceberg/table_metadata.h:
##########
@@ -144,6 +145,203 @@ ICEBERG_EXPORT std::string ToString(const 
SnapshotLogEntry& entry);
 /// \brief Returns a string representation of a MetadataLogEntry
 ICEBERG_EXPORT std::string ToString(const MetadataLogEntry& entry);
 
+/// \brief Builder class for constructing TableMetadata objects
+///
+/// This builder provides a fluent interface for creating and modifying table 
metadata.
+/// It supports both creating new tables and building from existing metadata.
+///
+/// Each modification method generates a corresponding MetadataUpdate that is 
tracked
+/// in a changes list. This allows the builder to maintain a complete history 
of all
+/// modifications made to the table metadata, which is important for tracking 
table
+/// evolution and for serialization purposes.
+class ICEBERG_EXPORT TableMetadataBuilder {
+ public:
+  /// \brief Create a builder for a new table
+  ///
+  /// \param format_version The format version for the table
+  /// \return A new TableMetadataBuilder instance
+  static TableMetadataBuilder BuildFromEmpty(
+      int8_t format_version = TableMetadata::kDefaultTableFormatVersion);
+
+  /// \brief Create a builder from existing table metadata
+  ///
+  /// \param base The base table metadata to build from
+  /// \return A new TableMetadataBuilder instance initialized with base 
metadata
+  static TableMetadataBuilder BuildFrom(const std::shared_ptr<const 
TableMetadata>& base);
+
+  /// \brief Assign a UUID to the table
+  ///
+  /// If no UUID is provided, a random UUID will be generated.
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AssignUUID();
+
+  /// \brief Assign a specific UUID to the table
+  ///
+  /// \param uuid The UUID string to assign
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& AssignUUID(const std::string& uuid);
+
+  /// \brief Upgrade the format version of the table
+  ///
+  /// \param new_format_version The new format version (must be >= current 
version)
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& UpgradeFormatVersion(int8_t new_format_version);
+
+  /// \brief Set the current schema for the table
+  ///
+  /// \param schema The schema to set as current
+  /// \param new_last_column_id The highest column ID in the schema
+  /// \return Reference to this builder for method chaining
+  TableMetadataBuilder& SetCurrentSchema(std::shared_ptr<iceberg::Schema> 
schema,

Review Comment:
   Same for below to remove redundant namespace prefix.



##########
src/iceberg/table_metadata.cc:
##########
@@ -196,4 +197,229 @@ Status TableMetadataUtil::Write(FileIO& io, const 
std::string& location,
   return io.WriteFile(location, json_string);
 }
 
+// TableMetadataBuilder implementation
+
+struct TableMetadataBuilder::Impl {
+  // Base metadata (if building from existing metadata)
+  std::shared_ptr<const TableMetadata> base;

Review Comment:
   Is it better to make it `const TableMetadata*` so it can accept broader 
types?



##########
src/iceberg/table_metadata.cc:
##########
@@ -196,4 +197,229 @@ Status TableMetadataUtil::Write(FileIO& io, const 
std::string& location,
   return io.WriteFile(location, json_string);
 }
 
+// TableMetadataBuilder implementation
+
+struct TableMetadataBuilder::Impl {
+  // Base metadata (if building from existing metadata)
+  std::shared_ptr<const TableMetadata> base;
+
+  // Mutable fields that will be used to build the final TableMetadata
+  int8_t format_version;
+  std::string table_uuid;
+  std::string location;
+  int64_t last_sequence_number;
+  TimePointMs last_updated_ms;
+  int32_t last_column_id;
+  std::vector<std::shared_ptr<Schema>> schemas;
+  std::optional<int32_t> current_schema_id;
+  std::vector<std::shared_ptr<PartitionSpec>> partition_specs;
+  int32_t default_spec_id;
+  int32_t last_partition_id;
+  std::unordered_map<std::string, std::string> properties;
+  int64_t current_snapshot_id;
+  std::vector<std::shared_ptr<Snapshot>> snapshots;
+  std::vector<SnapshotLogEntry> snapshot_log;
+  std::vector<MetadataLogEntry> metadata_log;
+  std::vector<std::shared_ptr<SortOrder>> sort_orders;
+  int32_t default_sort_order_id;
+  std::unordered_map<std::string, std::shared_ptr<SnapshotRef>> refs;
+  std::vector<std::shared_ptr<StatisticsFile>> statistics;
+  std::vector<std::shared_ptr<PartitionStatisticsFile>> partition_statistics;
+  int64_t next_row_id;
+
+  // List of changes (MetadataUpdate objects)
+  std::vector<std::unique_ptr<MetadataUpdate>> changes;
+
+  explicit Impl(int8_t fmt_version)
+      : format_version(fmt_version),
+        last_sequence_number(TableMetadata::kInitialSequenceNumber),

Review Comment:
   Should we use in-place initialization?



##########
src/iceberg/metadata_update.h:
##########
@@ -0,0 +1,431 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/metadata_update.h
+/// Metadata update operations for Iceberg tables.
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/snapshot.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+class TableMetadataBuilder;
+class UpdateRequirementsContext;
+
+/// \brief Base class for metadata update operations
+///
+/// Represents a change to table metadata. Each concrete subclass
+/// represents a specific type of update operation.
+class ICEBERG_EXPORT MetadataUpdate {
+ public:
+  virtual ~MetadataUpdate() = default;
+
+  /// \brief Clone this metadata update
+  virtual std::unique_ptr<MetadataUpdate> Clone() const = 0;
+
+  /// \brief Apply this update to a TableMetadataBuilder
+  ///
+  /// This method modifies the builder by applying the update operation
+  /// it represents. Each subclass implements this to apply its specific
+  /// type of update.
+  ///
+  /// \param builder The builder to apply this update to
+  virtual void ApplyTo(TableMetadataBuilder& builder) const = 0;
+
+  /// \brief Generate update requirements for this metadata update
+  ///
+  /// This method generates the appropriate UpdateRequirement instances
+  /// that must be validated before this update can be applied. The context
+  /// provides information about the base metadata and operation mode.
+  ///
+  /// \param context The context containing base metadata and operation state
+  virtual void GenerateRequirements(UpdateRequirementsContext& context) const 
= 0;
+};
+
+/// \brief Represents an assignment of a UUID to the table
+class ICEBERG_EXPORT AssignUUID : public MetadataUpdate {
+ public:
+  explicit AssignUUID(std::string uuid) : uuid_(std::move(uuid)) {}
+
+  const std::string& uuid() const { return uuid_; }
+
+  std::unique_ptr<MetadataUpdate> Clone() const override {
+    return std::make_unique<AssignUUID>(uuid_);
+  }
+
+  void ApplyTo(TableMetadataBuilder& builder) const override;
+
+  void GenerateRequirements(UpdateRequirementsContext& context) const override;
+
+ private:
+  std::string uuid_;
+};
+
+/// \brief Represents an upgrade of the table format version
+class ICEBERG_EXPORT UpgradeFormatVersion : public MetadataUpdate {
+ public:
+  explicit UpgradeFormatVersion(int8_t format_version)
+      : format_version_(format_version) {}
+
+  int8_t format_version() const { return format_version_; }
+
+  std::unique_ptr<MetadataUpdate> Clone() const override {
+    return std::make_unique<UpgradeFormatVersion>(format_version_);
+  }
+
+  void ApplyTo(TableMetadataBuilder& builder) const override;
+
+  void GenerateRequirements(UpdateRequirementsContext& context) const override;
+
+ private:
+  int8_t format_version_;
+};
+
+/// \brief Represents adding a new schema to the table
+class ICEBERG_EXPORT AddSchema : public MetadataUpdate {
+ public:
+  explicit AddSchema(std::shared_ptr<iceberg::Schema> schema, int32_t 
last_column_id)

Review Comment:
   ```suggestion
     explicit AddSchema(std::shared_ptr<Schema> schema, int32_t last_column_id)
   ```
   
   Remove redundant namespace prefix. Same for below.



##########
src/iceberg/table_metadata.cc:
##########
@@ -196,4 +197,229 @@ Status TableMetadataUtil::Write(FileIO& io, const 
std::string& location,
   return io.WriteFile(location, json_string);
 }
 
+// TableMetadataBuilder implementation
+
+struct TableMetadataBuilder::Impl {
+  // Base metadata (if building from existing metadata)
+  std::shared_ptr<const TableMetadata> base;
+
+  // Mutable fields that will be used to build the final TableMetadata
+  int8_t format_version;

Review Comment:
   Should we just use `std::unique_ptr<TableMetadata>` and set everything to it?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to