wgtmac commented on code in PR #607: URL: https://github.com/apache/iceberg-cpp/pull/607#discussion_r3408933242
########## src/iceberg/inspect/metadata_table.cc: ########## @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/inspect/metadata_table.h" + +#include <memory> +#include <string> +#include <utility> + +#include "iceberg/file_io.h" +#include "iceberg/inspect/history_table.h" +#include "iceberg/inspect/snapshots_table.h" +#include "iceberg/partition_spec.h" +#include "iceberg/schema.h" +#include "iceberg/schema_field.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_identifier.h" +#include "iceberg/table_metadata.h" +#include "iceberg/table_properties.h" +#include "iceberg/table_scan.h" +#include "iceberg/type.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/uuid.h" + +namespace iceberg { + +MetadataTable::MetadataTable(std::shared_ptr<Table> source_table, + TableIdentifier identifier) + : StaticTable(identifier, source_table->metadata(), + std::string(source_table->metadata_file_location()), source_table->io(), + source_table->catalog()), + source_table_(std::move(source_table)) { + auto schema = GetSchema(); + if (!schema) { + schema = std::make_shared<Schema>(std::vector<SchemaField>{}, 1); + } + + auto builder = + TableMetadataBuilder::BuildFromEmpty(TableMetadata::kDefaultTableFormatVersion); + auto result = builder->AssignUUID(Uuid::GenerateV4().ToString()) + .SetLocation(std::string(source_table_->location())) + .SetCurrentSchema(schema, schema->schema_id()) + .SetDefaultSortOrder(SortOrder::Unsorted()) + .SetDefaultPartitionSpec(PartitionSpec::Unpartitioned()) + .SetProperties({}) + .Build(); + + if (!result.has_value()) { + // If metadata building fails, keep the original metadata from source_table + return; + } + + std::shared_ptr<TableMetadata> built_metadata = std::move(result.value()); + + metadata_ = built_metadata; + metadata_cache_ = std::make_unique<TableMetadataCache>(metadata_.get()); +} + +MetadataTable::~MetadataTable() = default; + +Status MetadataTable::Refresh() { return source_table_->Refresh(); } + +Result<std::unique_ptr<DataTableScanBuilder>> MetadataTable::NewScan() const { + return NotSupported("TODO: Scanning metadata tables is not yet supported"); +}; + +Result<std::unique_ptr<MetadataTable>> MetadataTableFactory::CreateMetadataTable( + std::shared_ptr<Table> table, MetadataTableType type) { Review Comment: Please validate `table` here. Passing `nullptr` reaches the concrete constructor and crashes on `table->name()` instead of returning `InvalidArgument`. ########## src/iceberg/inspect/metadata_table.cc: ########## @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/inspect/metadata_table.h" + +#include <memory> +#include <string> +#include <utility> + +#include "iceberg/file_io.h" +#include "iceberg/inspect/history_table.h" +#include "iceberg/inspect/snapshots_table.h" +#include "iceberg/partition_spec.h" +#include "iceberg/schema.h" +#include "iceberg/schema_field.h" +#include "iceberg/sort_order.h" +#include "iceberg/table_identifier.h" +#include "iceberg/table_metadata.h" +#include "iceberg/table_properties.h" +#include "iceberg/table_scan.h" +#include "iceberg/type.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/uuid.h" + +namespace iceberg { + +MetadataTable::MetadataTable(std::shared_ptr<Table> source_table, + TableIdentifier identifier) + : StaticTable(identifier, source_table->metadata(), + std::string(source_table->metadata_file_location()), source_table->io(), + source_table->catalog()), + source_table_(std::move(source_table)) { + auto schema = GetSchema(); Review Comment: Calling `GetSchema()` here does not call the derived override. During base construction, C++ dispatches to `MetadataTable::GetSchema()`, so snapshots/history tables get the empty fallback schema. Please pass the schema into the base constructor or set it after the derived object is built. ########## src/iceberg/inspect/metadata_table.h: ########## @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include <memory> +#include <string> + +#include "iceberg/iceberg_export.h" +#include "iceberg/location_provider.h" +#include "iceberg/result.h" +#include "iceberg/sort_order.h" +#include "iceberg/table.h" +#include "iceberg/table_metadata.h" +#include "iceberg/table_scan.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +/// \brief The type of metadata table +enum class MetadataTableType { + kSnapshots, + kHistory, +}; + +/// \brief Base class for Iceberg metadata tables +/// +/// Metadata tables expose table metadata as queryable tables with schemas and scan +/// support. They provide read-only access to metadata. +class ICEBERG_EXPORT MetadataTable : public StaticTable { + public: + virtual MetadataTableType type() const noexcept = 0; + + /// \brief Returns the table's metadata file location + std::string_view metadata_file_location() const { + return source_table_->metadata_file_location(); + } + + /// \brief Returns the table's base location + std::string_view location() const { return source_table_->location(); } + + /// \brief Returns the time when this table was last updated + TimePointMs last_updated_ms() const { return source_table_->last_updated_ms(); } + + /// \brief Returns the table's current snapshot, return NotFoundError if not found + Result<std::shared_ptr<Snapshot>> current_snapshot() const { + return source_table_->current_snapshot(); + } + + /// \brief Get the snapshot of this table with the given id + /// + /// \param snapshot_id the ID of the snapshot to get + /// \return the Snapshot with the given id, return NotFoundError if not found + Result<std::shared_ptr<Snapshot>> SnapshotById(int64_t snapshot_id) const { + return source_table_->SnapshotById(snapshot_id); + } + + /// \brief Get the snapshots of this table + const std::vector<std::shared_ptr<Snapshot>>& snapshots() const { + return source_table_->snapshots(); + } + + /// \brief Get the snapshot history of this table + const std::vector<SnapshotLogEntry>& history() const { + return source_table_->history(); + } + + /// \brief Returns the catalog that this table belongs to + const std::shared_ptr<Catalog>& catalog() const { return source_table_->catalog(); } + + /// \brief Returns a LocationProvider for this table + Result<std::unique_ptr<LocationProvider>> location_provider() const { + return source_table_->location_provider(); + } + + /// \brief Refreshing is not supported in metadata tables. + Status Refresh() override; + + /// \brief Create a new table scan builder for this table + /// + /// Once a table scan builder is created, it can be refined to project columns and + /// filter data. + Result<std::unique_ptr<DataTableScanBuilder>> NewScan() const override; + + ~MetadataTable(); + + protected: + explicit MetadataTable(std::shared_ptr<Table> source_table, TableIdentifier identifier); + + /// \brief Returns the schema for this metadata table + /// + /// Subclasses override this method to provide their custom schema during + /// MetadataTable construction. The returned schema is used to initialize + /// the underlying TableMetadata. + /// + /// \return The schema for this metadata table, or nullptr for default schema + virtual std::shared_ptr<Schema> GetSchema() const { return nullptr; } + + std::shared_ptr<Table> source_table_; +}; + +/// \brief Metadata table factory and inspector +/// +/// MetadataTable provides factory methods to create specific metadata tables for +/// inspecting table metadata. Each metadata table exposes a different aspect of the +/// table's metadata as a scannable Iceberg table. +/// +/// Usage: +/// auto metadata_table = ICEBERG_TRY( +/// MetadataTableFactory::CreateMetadataTable( +/// table, MetadataTableType::kSnapshots)); +/// auto scan = ICEBERG_TRY(metadata_table->NewScan()); +/// // ... scan and read metadata table data +class ICEBERG_EXPORT MetadataTableFactory { + public: + /// \brief Create a metadata table from a table + /// + /// \param table The source table + /// \param type The metadata table type to create + /// \return A MetadataTable instance or error status + static Result<std::unique_ptr<MetadataTable>> CreateMetadataTable( Review Comment: Returning `unique_ptr` is unsafe for a `Table` subclass. `Table` uses `enable_shared_from_this()`, and inherited methods like `NewFastAppend()` can throw `bad_weak_ptr`. Metadata tables also need to override all mutating APIs as read-only. ########## src/iceberg/CMakeLists.txt: ########## @@ -41,6 +41,9 @@ set(ICEBERG_SOURCES file_io_registry.cc file_reader.cc file_writer.cc + inspect/history_table.cc + inspect/metadata_table.cc + inspect/snapshots_table.cc Review Comment: These sources are added, but the new `inspect/CMakeLists.txt` is not included below. Please add `add_subdirectory(inspect)`, otherwise CMake install will miss the public headers. ########## src/iceberg/inspect/snapshots_table.cc: ########## @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/inspect/snapshots_table.h" + +#include <memory> +#include <utility> + +#include "iceberg/inspect/metadata_table.h" +#include "iceberg/schema.h" +#include "iceberg/schema_field.h" +#include "iceberg/table_identifier.h" +#include "iceberg/type.h" + +namespace iceberg { + +SnapshotsTable::SnapshotsTable(std::shared_ptr<Table> table) + : MetadataTable(table, CreateName(table->name())) {} + +SnapshotsTable::~SnapshotsTable() = default; + +std::shared_ptr<Schema> SnapshotsTable::GetSchema() const { + return std::make_shared<Schema>( + std::vector<SchemaField>{ + SchemaField::MakeRequired(1, "committed_at", timestamp_tz()), + SchemaField::MakeRequired(2, "snapshot_id", int64()), + SchemaField::MakeRequired(3, "parent_id", int64()), + SchemaField::MakeOptional(4, "operation", int64()), Review Comment: This schema does not match Iceberg. `parent_id` must be optional and `operation` must be a string, not int64. Valid snapshots can have no parent and operations such as `append`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
