Copilot commented on code in PR #86:
URL: https://github.com/apache/paimon-cpp/pull/86#discussion_r3418729414


##########
src/paimon/core/manifest/manifest_file_meta.cpp:
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "paimon/core/manifest/manifest_file_meta.h"
+
+#include "arrow/type_fwd.h"

Review Comment:
   This translation unit includes `arrow/type_fwd.h` but uses Arrow 
construction helpers like `arrow::struct_`, `arrow::field`, and concrete type 
factories (`utf8()`, `int64()`, etc.). Those symbols are typically not provided 
by the forward-declare header and can fail to compile depending on include 
order. Include `arrow/api.h` (or the specific Arrow headers that define these 
helpers) in this `.cpp`.



##########
src/paimon/core/manifest/manifest_file_meta_serializer.cpp:
##########
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "paimon/core/manifest/manifest_file_meta_serializer.h"
+
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "fmt/format.h"
+#include "paimon/common/data/binary_string.h"
+#include "paimon/common/data/internal_row.h"
+#include "paimon/core/stats/simple_stats.h"
+#include "paimon/status.h"
+
+namespace paimon {
+
+Result<BinaryRow> ManifestFileMetaSerializer::ToRow(const ManifestFileMeta& 
record) const {
+    BinaryRow row(GetDataType()->num_fields());
+    BinaryRowWriter writer(&row, 32 * 1024, pool_.get());
+
+    writer.WriteInt(0, GetVersion());
+    writer.WriteString(1, BinaryString::FromString(record.FileName(), 
pool_.get()));
+    writer.WriteLong(2, record.FileSize());
+    writer.WriteLong(3, record.NumAddedFiles());
+    writer.WriteLong(4, record.NumDeletedFiles());
+    writer.WriteRow(5, record.PartitionStats().ToRow());
+    writer.WriteLong(6, record.SchemaId());
+
+    auto min_bucket = record.MinBucket();
+    if (!min_bucket) {
+        writer.SetNullAt(7);
+    } else {
+        writer.WriteInt(7, min_bucket.value());
+    }
+
+    auto max_bucket = record.MaxBucket();
+    if (!max_bucket) {
+        writer.SetNullAt(8);
+    } else {
+        writer.WriteInt(8, max_bucket.value());
+    }
+
+    auto min_level = record.MinLevel();
+    if (!min_level) {
+        writer.SetNullAt(9);
+    } else {
+        writer.WriteInt(9, min_level.value());
+    }
+
+    auto max_level = record.MaxLevel();
+    if (!max_level) {
+        writer.SetNullAt(10);
+    } else {
+        writer.WriteInt(10, max_level.value());
+    }
+    auto min_row_id = record.MinRowId();
+    if (!min_row_id) {
+        writer.SetNullAt(11);
+    } else {
+        writer.WriteInt(11, min_row_id.value());
+    }
+
+    auto max_row_id = record.MaxRowId();
+    if (!max_row_id) {
+        writer.SetNullAt(12);
+    } else {
+        writer.WriteInt(12, max_row_id.value());
+    }
+    writer.Complete();
+    return row;
+}
+
+Result<ManifestFileMeta> ManifestFileMetaSerializer::ConvertFrom(int32_t 
version,
+                                                                 const 
InternalRow& row) const {
+    if (version != VERSION_2) {
+        if (version == VERSION_1) {
+            return Status::Invalid(
+                fmt::format("The current version {} is not compatible with the 
"
+                            "version {}, please recreate the table.",
+                            GetVersion(), version));
+        }
+        return Status::Invalid(fmt::format("Unsupported version: {}", 
version));
+    }
+    auto file_name = row.GetString(0);
+    auto file_size = row.GetLong(1);
+    auto num_added_files = row.GetLong(2);
+    auto num_deleted_files = row.GetLong(3);
+    auto partition_stats_row = row.GetRow(4, 3);

Review Comment:
   `GetRow(4, 3)` hardcodes the nested row arity for `SimpleStats`. This is a 
brittle “magic number” and will break if `SimpleStats`’ schema changes. Prefer 
deriving the field count from the stats type/serializer (e.g., 
`SimpleStats::DataType()->num_fields()` or an equivalent `NumFields()` helper) 
so the deserializer stays aligned with the schema.



##########
src/paimon/core/manifest/manifest_file_meta_serializer.cpp:
##########
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "paimon/core/manifest/manifest_file_meta_serializer.h"
+
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "fmt/format.h"
+#include "paimon/common/data/binary_string.h"
+#include "paimon/common/data/internal_row.h"
+#include "paimon/core/stats/simple_stats.h"
+#include "paimon/status.h"
+
+namespace paimon {
+
+Result<BinaryRow> ManifestFileMetaSerializer::ToRow(const ManifestFileMeta& 
record) const {
+    BinaryRow row(GetDataType()->num_fields());
+    BinaryRowWriter writer(&row, 32 * 1024, pool_.get());
+
+    writer.WriteInt(0, GetVersion());
+    writer.WriteString(1, BinaryString::FromString(record.FileName(), 
pool_.get()));
+    writer.WriteLong(2, record.FileSize());
+    writer.WriteLong(3, record.NumAddedFiles());
+    writer.WriteLong(4, record.NumDeletedFiles());
+    writer.WriteRow(5, record.PartitionStats().ToRow());
+    writer.WriteLong(6, record.SchemaId());
+
+    auto min_bucket = record.MinBucket();
+    if (!min_bucket) {
+        writer.SetNullAt(7);
+    } else {
+        writer.WriteInt(7, min_bucket.value());
+    }
+
+    auto max_bucket = record.MaxBucket();
+    if (!max_bucket) {
+        writer.SetNullAt(8);
+    } else {
+        writer.WriteInt(8, max_bucket.value());
+    }
+
+    auto min_level = record.MinLevel();
+    if (!min_level) {
+        writer.SetNullAt(9);
+    } else {
+        writer.WriteInt(9, min_level.value());
+    }
+
+    auto max_level = record.MaxLevel();
+    if (!max_level) {
+        writer.SetNullAt(10);
+    } else {
+        writer.WriteInt(10, max_level.value());
+    }
+    auto min_row_id = record.MinRowId();
+    if (!min_row_id) {
+        writer.SetNullAt(11);
+    } else {
+        writer.WriteInt(11, min_row_id.value());
+    }
+
+    auto max_row_id = record.MaxRowId();
+    if (!max_row_id) {
+        writer.SetNullAt(12);
+    } else {
+        writer.WriteInt(12, max_row_id.value());
+    }

Review Comment:
   `min_row_id`/`max_row_id` are `int64_t` in `ManifestFileMeta` (and read back 
via `row.GetLong(...)`), but serialization currently uses 
`writer.WriteInt(...)` (32-bit). This will truncate values and also makes the 
writer/reader types inconsistent. Use the 64-bit write API for these fields 
(e.g., `WriteLong`) to match the Arrow schema (`int64`) and the reader 
(`GetLong`).



##########
src/paimon/core/manifest/manifest_entry_serializer.cpp:
##########
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "paimon/core/manifest/manifest_entry_serializer.h"
+
+#include <string>
+#include <utility>
+
+#include "fmt/format.h"
+#include "paimon/common/data/binary_row.h"
+#include "paimon/common/data/internal_row.h"
+#include "paimon/common/utils/serialization_utils.h"
+#include "paimon/core/manifest/file_kind.h"
+#include "paimon/status.h"
+
+namespace paimon {
+class MemoryPool;
+struct DataFileMeta;
+
+Result<ManifestEntry> ManifestEntrySerializer::ConvertFrom(int32_t version,
+                                                           const InternalRow& 
row) const {
+    if (version != VERSION_2) {
+        if (version == VERSION_1) {
+            return Status::Invalid(
+                fmt::format("The current version {} is not compatible with the 
version {}, "
+                            "please recreate the table.",
+                            GetVersion(), version));
+        }
+        return Status::Invalid("Unsupported version", std::to_string(version));
+    }
+    auto kind = row.GetByte(0);
+    PAIMON_ASSIGN_OR_RAISE(FileKind file_kind, FileKind::FromByteValue(kind));
+    auto partition_bytes = row.GetBinary(1);
+    PAIMON_ASSIGN_OR_RAISE(BinaryRow partition,
+                           
SerializationUtils::DeserializeBinaryRow(partition_bytes));
+    auto bucket = row.GetInt(2);
+    auto total_buckets = row.GetInt(3);
+    auto file = row.GetRow(4, data_file_meta_serializer_.NumFields());
+    if (!file) {
+        return Status::Invalid("ManifestEntry convert from row failed, with 
null DataFileMeta");
+    }
+    PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<DataFileMeta> meta,
+                           data_file_meta_serializer_.FromRow(*file))
+    return ManifestEntry(file_kind, partition, bucket, total_buckets, meta);

Review Comment:
   Missing semicolon after the `PAIMON_ASSIGN_OR_RAISE(...)` invocation causes 
a compilation error. Add the terminating `;` so the return statement is a 
separate statement.



##########
src/paimon/core/manifest/file_source.h:
##########
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "fmt/format.h"
+#include "paimon/result.h"
+#include "paimon/status.h"
+
+namespace paimon {
+/// The Source of a file.
+class FileSource {
+ public:
+    /// The file from new input.
+    static const FileSource& Append();
+    /// The file from compaction.
+    static const FileSource& Compact();
+
+    int8_t ToByteValue() const {
+        return value_;
+    }
+
+    static Result<FileSource> FromByteValue(int8_t value) {
+        switch (value) {
+            case 0:
+                return Append();
+            case 1:
+                return Compact();
+            default:
+                return Status::Invalid(
+                    fmt::format("Unsupported byte value {} for value kind.", 
value));

Review Comment:
   The error message says “value kind” which doesn’t match the type 
(`FileSource`). This looks like a copy/paste and makes debugging harder. 
Consider changing it to consistently reference `FileSource` (e.g., “Unsupported 
byte value {} for file source.”).



##########
src/paimon/core/manifest/manifest_entry.cpp:
##########
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "paimon/core/manifest/manifest_entry.h"
+
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+class DataType;
+}  // namespace arrow
+
+namespace paimon {
+const std::shared_ptr<arrow::DataType>& ManifestEntry::DataType() {
+    static std::shared_ptr<arrow::DataType> data_type =
+        arrow::struct_({arrow::field("_KIND", arrow::int8(), 
/*nullable=*/false),
+                        arrow::field("_PARTITION", arrow::binary(), 
/*nullable=*/false),
+                        arrow::field("_BUCKET", arrow::int32(), 
/*nullable=*/false),
+                        arrow::field("_TOTAL_BUCKETS", arrow::int32(), 
/*nullable=*/false),
+                        arrow::field("_FILE", DataFileMeta::DataType(), 
/*nullable=*/false)});

Review Comment:
   Same issue as `manifest_file_meta.cpp`: `arrow/type_fwd.h` is unlikely to 
define `arrow::struct_`, `arrow::field`, and the type factory helpers. Add the 
appropriate Arrow include (commonly `arrow/api.h`) to avoid compile failures 
due to missing declarations/definitions.



##########
src/paimon/core/manifest/file_entry.h:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <variant>
+#include <vector>
+
+#include "fmt/format.h"
+#include "paimon/common/data/binary_row.h"
+#include "paimon/common/utils/linked_hash_map.h"
+#include "paimon/common/utils/murmurhash_utils.h"
+#include "paimon/core/manifest/file_kind.h"
+#include "paimon/status.h"
+
+namespace paimon {
+
+/// Entry representing a file.
+class FileEntry {
+ public:
+    /// The same `Identifier` indicates that the `ManifestEntry` refers to the 
same data
+    /// file.
+    struct Identifier {
+        Identifier(const BinaryRow& _partition, int32_t _bucket, int32_t 
_level,
+                   const std::string& _file_name, const 
std::optional<std::string>& _external_path)
+            : partition(_partition),
+              bucket(_bucket),
+              level(_level),
+              file_name(_file_name),
+              external_path(_external_path) {}
+
+        bool operator==(const Identifier& other) const {
+            return partition == other.partition && bucket == other.bucket && 
level == other.level &&
+                   file_name == other.file_name && external_path == 
other.external_path;
+        }
+
+        size_t HashCode() const {
+            if (hash_ == static_cast<size_t>(-1)) {
+                hash_ = partition.HashCode();
+                hash_ =
+                    MurmurHashUtils::HashUnsafeBytes(reinterpret_cast<const 
void*>(&bucket),
+                                                     /*offset=*/0, 
sizeof(bucket), /*seed=*/hash_);
+                hash_ = 
MurmurHashUtils::HashUnsafeBytes(reinterpret_cast<const void*>(&level),
+                                                         /*offset=*/0, 
sizeof(level),
+                                                         /*seed=*/hash_);
+                hash_ = MurmurHashUtils::HashUnsafeBytes(
+                    reinterpret_cast<const void*>(file_name.data()),
+                    /*offset=*/0, file_name.size(),
+                    /*seed=*/hash_);
+                if (external_path) {
+                    hash_ = MurmurHashUtils::HashUnsafeBytes(
+                        reinterpret_cast<const 
void*>(external_path.value().data()),
+                        /*offset=*/0, external_path.value().size(),
+                        /*seed=*/hash_);
+                }
+            }
+            return hash_;
+        }
+
+        std::string ToString() const {
+            std::string external_path_str =
+                external_path == std::nullopt ? "null" : external_path.value();
+            return fmt::format("{{{}, {}, {}, {}, {}}}", partition.ToString(), 
bucket, level,
+                               file_name, external_path_str);
+        }
+
+        BinaryRow partition;
+        int32_t bucket;
+        int32_t level;
+        std::string file_name;
+        std::optional<std::string> external_path;
+
+     private:
+        mutable size_t hash_ = -1;
+    };
+
+ public:
+    virtual ~FileEntry() = default;
+    virtual const FileKind& Kind() const = 0;
+    virtual const BinaryRow& Partition() const = 0;
+    virtual int32_t Bucket() const = 0;
+    virtual int32_t Level() const = 0;
+    virtual const std::string& FileName() const = 0;
+    virtual const std::optional<std::string>& ExternalPath() const = 0;
+    virtual Identifier CreateIdentifier() const = 0;
+    virtual const BinaryRow& MinKey() const = 0;
+    virtual const BinaryRow& MaxKey() const = 0;
+
+    template <typename T>
+    static Status MergeEntries(const std::vector<T>& unmerged_entries,
+                               std::vector<T>* merged_entries) {
+        LinkedHashMap<Identifier, T> merged_map;
+        PAIMON_RETURN_NOT_OK(MergeEntries(unmerged_entries, &merged_map));
+        for (const auto& [identifier, entry] : merged_map) {
+            merged_entries->emplace_back(entry);
+        }
+        return Status::OK();
+    }
+
+    template <typename T>
+    static Status MergeEntries(const std::vector<T>& unmerged_entries,
+                               LinkedHashMap<Identifier, T>* merged_map_ptr) {
+        auto& merged_map = *merged_map_ptr;
+        for (const auto& entry : unmerged_entries) {
+            Identifier identifier = entry.CreateIdentifier();
+            const auto& kind = entry.Kind();
+            auto iter = merged_map.find(identifier);
+            if (kind == FileKind::Add()) {
+                if (iter != merged_map.end()) {
+                    return Status::Invalid(fmt::format(
+                        "Trying to add file {} which is already added.", 
identifier.ToString()));
+                }
+                merged_map.insert(identifier, entry);
+            } else if (kind == FileKind::Delete()) {
+                // each dataFile will only be added once and deleted once,
+                // if we know that it is added before then both add and delete 
entry can be
+                // removed because there won't be further operations on this 
file,
+                // otherwise we have to keep the delete entry because the add 
entry must be
+                // in the previous manifest files
+                if (iter != merged_map.end()) {
+                    merged_map.erase(identifier);
+                } else {
+                    merged_map.insert(identifier, entry);
+                }
+            } else {
+                return Status::Invalid("Unknown value kind ",
+                                       
std::to_string(static_cast<int32_t>(kind.ToByteValue())));

Review Comment:
   The error message says “value kind”, but the domain here is `FileKind` / 
file-entry kind. Updating the wording to “Unknown file kind” (and formatting it 
as a single coherent message, consistent with other errors in this PR) would 
make failures clearer and easier to search for.



##########
src/paimon/core/manifest/manifest_entry.cpp:
##########
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "paimon/core/manifest/manifest_entry.h"
+
+#include "arrow/type_fwd.h"

Review Comment:
   Same issue as `manifest_file_meta.cpp`: `arrow/type_fwd.h` is unlikely to 
define `arrow::struct_`, `arrow::field`, and the type factory helpers. Add the 
appropriate Arrow include (commonly `arrow/api.h`) to avoid compile failures 
due to missing declarations/definitions.



##########
src/paimon/core/manifest/manifest_file_meta.cpp:
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "paimon/core/manifest/manifest_file_meta.h"
+
+#include "arrow/type_fwd.h"
+#include "fmt/format.h"
+
+namespace arrow {
+class DataType;
+}  // namespace arrow
+
+namespace paimon {
+
+ManifestFileMeta::ManifestFileMeta(
+    const std::string& file_name, int64_t file_size, int64_t num_added_files,
+    int64_t num_deleted_files, const SimpleStats& partition_stats, int64_t 
schema_id,
+    const std::optional<int32_t>& min_bucket, const std::optional<int32_t>& 
max_bucket,
+    const std::optional<int32_t>& min_level, const std::optional<int32_t>& 
max_level,
+    const std::optional<int64_t>& min_row_id, const std::optional<int64_t>& 
max_row_id)
+    : file_name_(file_name),
+      file_size_(file_size),
+      num_added_files_(num_added_files),
+      num_deleted_files_(num_deleted_files),
+      partition_stats_(partition_stats),
+      schema_id_(schema_id),
+      min_bucket_(min_bucket),
+      max_bucket_(max_bucket),
+      min_level_(min_level),
+      max_level_(max_level),
+      min_row_id_(min_row_id),
+      max_row_id_(max_row_id) {}
+
+bool ManifestFileMeta::operator==(const ManifestFileMeta& other) const {
+    if (this == &other) {
+        return true;
+    }
+    return file_name_ == other.file_name_ && file_size_ == other.file_size_ &&
+           num_added_files_ == other.num_added_files_ &&
+           num_deleted_files_ == other.num_deleted_files_ &&
+           partition_stats_ == other.partition_stats_ && schema_id_ == 
other.schema_id_ &&
+           min_bucket_ == other.min_bucket_ && max_bucket_ == 
other.max_bucket_ &&
+           min_level_ == other.min_level_ && max_level_ == other.max_level_ &&
+           min_row_id_ == other.min_row_id_ && max_row_id_ == 
other.max_row_id_;
+}
+
+const std::shared_ptr<arrow::DataType>& ManifestFileMeta::DataType() {
+    static std::shared_ptr<arrow::DataType> data_type = arrow::struct_({
+        arrow::field("_FILE_NAME", arrow::utf8(), /*nullable=*/false),
+        arrow::field("_FILE_SIZE", arrow::int64(), /*nullable=*/false),
+        arrow::field("_NUM_ADDED_FILES", arrow::int64(), /*nullable=*/false),
+        arrow::field("_NUM_DELETED_FILES", arrow::int64(), /*nullable=*/false),
+        arrow::field("_PARTITION_STATS", SimpleStats::DataType(), 
/*nullable=*/false),
+        arrow::field("_SCHEMA_ID", arrow::int64(), /*nullable=*/false),
+        arrow::field("_MIN_BUCKET", arrow::int32(), /*nullable=*/true),
+        arrow::field("_MAX_BUCKET", arrow::int32(), /*nullable=*/true),
+        arrow::field("_MIN_LEVEL", arrow::int32(), /*nullable=*/true),
+        arrow::field("_MAX_LEVEL", arrow::int32(), /*nullable=*/true),
+        arrow::field("_MIN_ROW_ID", arrow::int64(), /*nullable=*/true),
+        arrow::field("_MAX_ROW_ID", arrow::int64(), /*nullable=*/true),
+    });

Review Comment:
   This translation unit includes `arrow/type_fwd.h` but uses Arrow 
construction helpers like `arrow::struct_`, `arrow::field`, and concrete type 
factories (`utf8()`, `int64()`, etc.). Those symbols are typically not provided 
by the forward-declare header and can fail to compile depending on include 
order. Include `arrow/api.h` (or the specific Arrow headers that define these 
helpers) in this `.cpp`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to