evindj commented on code in PR #542: URL: https://github.com/apache/iceberg-cpp/pull/542#discussion_r2744455666
########## src/iceberg/transaction.h: ########## @@ -105,6 +105,9 @@ class ICEBERG_EXPORT Transaction : public std::enable_shared_from_this<Transacti /// and tags) and commit the changes. Result<std::shared_ptr<UpdateSnapshotReference>> NewUpdateSnapshotReference(); + /// \brief Create a new SnapshotManager to manage snapshots. Review Comment: Can you be a bit more descriptive here especially how it differs from ```NewUpdateSnapshotReference()``` I am a little confuse as to why the transaction would expose both since ```SnapshotManager``` seems to be a wrapper on top of ```UpdateSnapshotReference``` ########## src/iceberg/update/snapshot_manager.h: ########## @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <memory> +#include <string> + +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" +#include "iceberg/snapshot.h" +#include "iceberg/type_fwd.h" +#include "iceberg/update/pending_update.h" +#include "iceberg/util/timepoint.h" + +namespace iceberg { + +/// \brief API for managing snapshots and snapshot references. +/// +/// Allows rolling table data back to a state at an older snapshot, cherry-picking +/// snapshots, and managing branches and tags. +class ICEBERG_EXPORT SnapshotManager : public PendingUpdate { + public: + /// \brief Create a SnapshotManager for a table. + /// + /// \param table_name The name of the table + /// \param table The table to manage snapshots for + /// \return A new SnapshotManager instance, or an error if the table doesn't exist + static Result<std::shared_ptr<SnapshotManager>> Make(const std::string& table_name, + std::shared_ptr<Table> table); + + /// \brief Create a SnapshotManager from an existing transaction. + /// + /// \param transaction The transaction to use + /// \return A new SnapshotManager instance + static Result<std::shared_ptr<SnapshotManager>> Make( + std::shared_ptr<Transaction> transaction); + + ~SnapshotManager() override; + + // TODO(xxx): is this correct? + Kind kind() const final { return Kind::kUpdateSnapshotReference; } Review Comment: According to https://github.com/apache/iceberg-cpp/blob/main/src/iceberg/update/update_snapshot_reference.h this looks correct. But would still probably want to wait for someone who knows more about this space. ########## src/iceberg/update/snapshot_manager.cc: ########## @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/update/snapshot_manager.h" + +#include <memory> +#include <string> + +#include "iceberg/result.h" +#include "iceberg/snapshot.h" +#include "iceberg/table.h" +#include "iceberg/table_metadata.h" +#include "iceberg/transaction.h" +#include "iceberg/update/fast_append.h" +#include "iceberg/update/set_snapshot.h" +#include "iceberg/update/update_snapshot_reference.h" +#include "iceberg/util/macros.h" + +namespace iceberg { + +Result<std::shared_ptr<SnapshotManager>> SnapshotManager::Make( + const std::string& table_name, std::shared_ptr<Table> table) { + if (table == nullptr) { + return InvalidArgument("Table cannot be null"); + } + if (table->metadata() == nullptr) { + return InvalidArgument("Cannot manage snapshots: table {} does not exist", + table_name); + } + // Create a transaction first + ICEBERG_ASSIGN_OR_RAISE(auto transaction, + Transaction::Make(table, Transaction::Kind::kUpdate, + /*auto_commit=*/false)); + auto manager = std::shared_ptr<SnapshotManager>( + new SnapshotManager(std::move(transaction), /*is_external=*/false)); + return manager; +} + +Result<std::shared_ptr<SnapshotManager>> SnapshotManager::Make( + std::shared_ptr<Transaction> transaction) { + if (transaction == nullptr) { + return InvalidArgument("Invalid input transaction: null"); + } + return std::shared_ptr<SnapshotManager>( + new SnapshotManager(std::move(transaction), /*is_external=*/true)); +} + +SnapshotManager::SnapshotManager(std::shared_ptr<Transaction> transaction, + bool is_external) + : PendingUpdate(transaction), is_external_transaction_(is_external) {} + +SnapshotManager::~SnapshotManager() = default; + +SnapshotManager& SnapshotManager::Cherrypick(int64_t snapshot_id) { + ICEBERG_BUILDER_RETURN_IF_ERROR(CommitIfRefUpdatesExist()); + // TODO(anyone): Implement cherrypick operation Review Comment: Is there an issue for this? ########## src/iceberg/test/snapshot_manager_test.cc: ########## @@ -0,0 +1,494 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/update/snapshot_manager.h" + +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +#include "iceberg/result.h" +#include "iceberg/snapshot.h" +#include "iceberg/test/matchers.h" +#include "iceberg/test/update_test_base.h" +#include "iceberg/transaction.h" +#include "iceberg/update/fast_append.h" + +namespace iceberg { + +class SnapshotManagerTest : public UpdateTestBase { + protected: + // These snapshot IDs correspond to the snapshots in the TableMetadataV2Valid.json + static constexpr int64_t kOldestSnapshotId = 3051729675574597004; + static constexpr int64_t kCurrentSnapshotId = 3055729675574597004; Review Comment: since the metadata file is read in the base test setup, will it make sense to extract these in variables instead of copy pasta? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
