wgtmac commented on code in PR #299:
URL: https://github.com/apache/iceberg-cpp/pull/299#discussion_r2508619381
##########
src/iceberg/json_internal.cc:
##########
@@ -853,7 +855,6 @@ Result<std::shared_ptr<Schema>> ParseSchemas(
///
/// \param[in] json The JSON object to parse.
/// \param[in] format_version The format version of the table.
-/// \param[in] current_schema The current schema.
Review Comment:
revert this line
##########
src/iceberg/manifest_adapter.h:
##########
@@ -61,8 +61,10 @@ class ICEBERG_EXPORT ManifestAdapter {
/// Implemented by different versions with version-specific schemas.
class ICEBERG_EXPORT ManifestEntryAdapter : public ManifestAdapter {
public:
- explicit ManifestEntryAdapter(std::shared_ptr<PartitionSpec> partition_spec)
- : partition_spec_(std::move(partition_spec)) {}
+ ManifestEntryAdapter(std::shared_ptr<PartitionSpec> partition_spec,
+ std::shared_ptr<Schema> current_schema_)
+ : partition_spec_(std::move(partition_spec)),
+ current_schema_(std::move(current_schema_)) {}
Review Comment:
```suggestion
ManifestEntryAdapter(std::shared_ptr<PartitionSpec> partition_spec,
std::shared_ptr<Schema> current_schema)
: partition_spec_(std::move(partition_spec)),
current_schema_(std::move(current_schema)) {}
```
##########
src/iceberg/manifest_writer.cc:
##########
@@ -99,9 +101,9 @@ Result<std::unique_ptr<ManifestWriter>>
ManifestWriter::MakeV2Writer(
Result<std::unique_ptr<ManifestWriter>> ManifestWriter::MakeV3Writer(
std::optional<int64_t> snapshot_id, std::optional<int64_t> first_row_id,
std::string_view manifest_location, std::shared_ptr<FileIO> file_io,
- std::shared_ptr<PartitionSpec> partition_spec) {
- auto adapter = std::make_unique<ManifestEntryAdapterV3>(snapshot_id,
first_row_id,
-
std::move(partition_spec));
+ std::shared_ptr<PartitionSpec> partition_spec, std::shared_ptr<Schema>
table_schema) {
Review Comment:
```suggestion
std::shared_ptr<PartitionSpec> partition_spec, std::shared_ptr<Schema>
current_schema) {
```
##########
src/iceberg/partition_spec.h:
##########
@@ -56,24 +58,20 @@ class ICEBERG_EXPORT PartitionSpec : public
util::Formattable {
/// \param fields The partition fields.
/// \param last_assigned_field_id The last assigned field ID. If not
provided, it will
/// be calculated from the fields.
- PartitionSpec(std::shared_ptr<Schema> schema, int32_t spec_id,
- std::vector<PartitionField> fields,
+ PartitionSpec(int32_t spec_id, std::vector<PartitionField> fields,
std::optional<int32_t> last_assigned_field_id = std::nullopt);
/// \brief Get an unsorted partition spec singleton.
static const std::shared_ptr<PartitionSpec>& Unpartitioned();
- /// \brief Get the table schema
- const std::shared_ptr<Schema>& schema() const;
-
/// \brief Get the spec ID.
int32_t spec_id() const;
/// \brief Get a list view of the partition fields.
std::span<const PartitionField> fields() const;
/// \brief Get the partition type.
Review Comment:
```suggestion
/// \brief Get the partition type binding to the input schema.
```
##########
src/iceberg/manifest_writer.h:
##########
@@ -59,32 +59,41 @@ class ICEBERG_EXPORT ManifestWriter {
/// \param manifest_location Path to the manifest file.
/// \param file_io File IO implementation to use.
/// \param partition_spec Partition spec for the manifest.
+ /// \param table_schema Schema containing the source fields referenced by
partition
+ /// spec.
/// \return A Result containing the writer or an error.
static Result<std::unique_ptr<ManifestWriter>> MakeV1Writer(
std::optional<int64_t> snapshot_id, std::string_view manifest_location,
- std::shared_ptr<FileIO> file_io, std::shared_ptr<PartitionSpec>
partition_spec);
+ std::shared_ptr<FileIO> file_io, std::shared_ptr<PartitionSpec>
partition_spec,
+ std::shared_ptr<Schema> table_schema);
Review Comment:
```suggestion
std::shared_ptr<Schema> current_schema);
```
Same for below.
##########
src/iceberg/manifest_adapter.cc:
##########
@@ -152,7 +152,11 @@ Result<std::shared_ptr<StructType>>
ManifestEntryAdapter::GetManifestEntryType()
if (partition_spec_ == nullptr) [[unlikely]] {
return ManifestEntry::TypeFromPartitionType(nullptr);
}
- ICEBERG_ASSIGN_OR_RAISE(auto partition_type,
partition_spec_->PartitionType());
+ std::shared_ptr<StructType> partition_type = nullptr;
+ if (current_schema_ != nullptr) {
Review Comment:
```suggestion
ICEBERG_DCHECK(current_schema_ != nullptr, "Current schema cannot be
null");
```
##########
src/iceberg/json_internal.h:
##########
@@ -173,6 +173,7 @@ ICEBERG_EXPORT Result<std::string> ToJsonString(const
PartitionSpec& partition_s
/// objects. Each `PartitionField` will be parsed using the
`PartitionFieldFromJson`
/// function.
///
+/// \param schema The table schema.
Review Comment:
```suggestion
/// \param schema The current schema.
```
##########
src/iceberg/manifest_writer.h:
##########
@@ -59,32 +59,41 @@ class ICEBERG_EXPORT ManifestWriter {
/// \param manifest_location Path to the manifest file.
/// \param file_io File IO implementation to use.
/// \param partition_spec Partition spec for the manifest.
+ /// \param table_schema Schema containing the source fields referenced by
partition
+ /// spec.
Review Comment:
```suggestion
/// \param current_schema Current table schema.
```
##########
src/iceberg/manifest_writer.cc:
##########
@@ -68,9 +68,10 @@ Result<std::unique_ptr<Writer>> OpenFileWriter(
Result<std::unique_ptr<ManifestWriter>> ManifestWriter::MakeV1Writer(
std::optional<int64_t> snapshot_id, std::string_view manifest_location,
- std::shared_ptr<FileIO> file_io, std::shared_ptr<PartitionSpec>
partition_spec) {
- auto adapter =
- std::make_unique<ManifestEntryAdapterV1>(snapshot_id,
std::move(partition_spec));
+ std::shared_ptr<FileIO> file_io, std::shared_ptr<PartitionSpec>
partition_spec,
+ std::shared_ptr<Schema> current_schema_) {
Review Comment:
ditto
##########
src/iceberg/manifest_writer.cc:
##########
@@ -83,9 +84,10 @@ Result<std::unique_ptr<ManifestWriter>>
ManifestWriter::MakeV1Writer(
Result<std::unique_ptr<ManifestWriter>> ManifestWriter::MakeV2Writer(
std::optional<int64_t> snapshot_id, std::string_view manifest_location,
- std::shared_ptr<FileIO> file_io, std::shared_ptr<PartitionSpec>
partition_spec) {
- auto adapter =
- std::make_unique<ManifestEntryAdapterV2>(snapshot_id,
std::move(partition_spec));
+ std::shared_ptr<FileIO> file_io, std::shared_ptr<PartitionSpec>
partition_spec,
+ std::shared_ptr<Schema> table_schema) {
Review Comment:
```suggestion
std::shared_ptr<Schema> current_schema) {
```
##########
src/iceberg/table_scan.cc:
##########
@@ -269,7 +270,13 @@ Result<std::vector<std::shared_ptr<FileScanTask>>>
DataTableScan::PlanFiles() co
std::vector<std::shared_ptr<FileScanTask>> tasks;
ICEBERG_ASSIGN_OR_RAISE(auto partition_spec,
context_.table_metadata->PartitionSpec());
- auto partition_schema = partition_spec->schema();
+ // auto partition_schema = context_.table_metadata->Schema().value();
+
+ // Get the table schema and partition type
+ ICEBERG_ASSIGN_OR_RAISE(auto current_schema,
context_.table_metadata->Schema());
+ ICEBERG_ASSIGN_OR_RAISE(auto partition_type,
+ partition_spec->PartitionType(*current_schema));
+ auto partition_schema =
std::shared_ptr<StructType>(std::move(partition_type));
Review Comment:
```suggestion
ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<StructType> partition_schema,
partition_spec->PartitionType(*current_schema));
```
##########
src/iceberg/v1_metadata.cc:
##########
@@ -68,9 +68,13 @@ Result<std::shared_ptr<StructType>>
ManifestEntryAdapterV1::GetManifestEntryType
// Deprecated. Always write a default in v1. Do not write in v2 or v3.
static const SchemaField kBlockSizeInBytes = SchemaField::MakeRequired(
105, "block_size_in_bytes", int64(), "Block size in bytes");
- ICEBERG_ASSIGN_OR_RAISE(auto partition_type,
partition_spec_->PartitionType());
+ std::shared_ptr<StructType> partition_type = nullptr;
+ if (partition_spec_ != nullptr && current_schema_ != nullptr) {
+ ICEBERG_ASSIGN_OR_RAISE(partition_type,
+ partition_spec_->PartitionType(*current_schema_));
+ }
Review Comment:
```suggestion
ICEBERG_ASSIGN_OR_RAISE(auto partition_type,
partition_spec_->PartitionType(*current_schema_));
```
This is wrong since `current_schema_` and `partition_spec_` cannot be null.
##########
src/iceberg/partition_spec.h:
##########
@@ -32,6 +33,7 @@
#include "iceberg/iceberg_export.h"
#include "iceberg/partition_field.h"
#include "iceberg/result.h"
+#include "iceberg/schema.h"
Review Comment:
```suggestion
#include "iceberg/type_fwd.h"
```
##########
src/iceberg/manifest_writer.cc:
##########
@@ -68,9 +68,10 @@ Result<std::unique_ptr<Writer>> OpenFileWriter(
Result<std::unique_ptr<ManifestWriter>> ManifestWriter::MakeV1Writer(
std::optional<int64_t> snapshot_id, std::string_view manifest_location,
- std::shared_ptr<FileIO> file_io, std::shared_ptr<PartitionSpec>
partition_spec) {
- auto adapter =
- std::make_unique<ManifestEntryAdapterV1>(snapshot_id,
std::move(partition_spec));
+ std::shared_ptr<FileIO> file_io, std::shared_ptr<PartitionSpec>
partition_spec,
+ std::shared_ptr<Schema> current_schema_) {
Review Comment:
These `MakeVxWriter` functions should validate inputs like `current_schema`
is not null and returns `InvalidArgument` error.
##########
src/iceberg/json_internal.cc:
##########
@@ -902,8 +898,8 @@ Status ParsePartitionSpecs(const nlohmann::json& json,
int8_t format_version,
std::move(field->transform()));
}
- auto spec = std::make_unique<PartitionSpec>(
- current_schema, PartitionSpec::kInitialSpecId, std::move(fields));
+ auto spec =
Review Comment:
This is not resolved yet
##########
src/iceberg/table_scan.cc:
##########
@@ -269,7 +270,13 @@ Result<std::vector<std::shared_ptr<FileScanTask>>>
DataTableScan::PlanFiles() co
std::vector<std::shared_ptr<FileScanTask>> tasks;
ICEBERG_ASSIGN_OR_RAISE(auto partition_spec,
context_.table_metadata->PartitionSpec());
- auto partition_schema = partition_spec->schema();
+ // auto partition_schema = context_.table_metadata->Schema().value();
+
+ // Get the table schema and partition type
+ ICEBERG_ASSIGN_OR_RAISE(auto current_schema,
context_.table_metadata->Schema());
+ ICEBERG_ASSIGN_OR_RAISE(auto partition_type,
+ partition_spec->PartitionType(*current_schema));
+ auto partition_schema =
std::shared_ptr<StructType>(std::move(partition_type));
Review Comment:
Just notice that you can change line 284 below to
```
ManifestReader::Make(manifest_file, file_io_, std::move(partition_schema)));
```
##########
src/iceberg/manifest_writer.h:
##########
@@ -59,32 +59,41 @@ class ICEBERG_EXPORT ManifestWriter {
/// \param manifest_location Path to the manifest file.
/// \param file_io File IO implementation to use.
/// \param partition_spec Partition spec for the manifest.
+ /// \param table_schema Schema containing the source fields referenced by
partition
+ /// spec.
Review Comment:
The schema is used by manifest writer to serialize it to a json string. So
it is not used only by partition spec.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]