This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 95a51a1  test: add more table metadata interop test (#262)
95a51a1 is described below

commit 95a51a11efd130d9691efea3541ed69d3ad73ecc
Author: Gang Wu <[email protected]>
AuthorDate: Fri Oct 17 22:03:39 2025 +0800

    test: add more table metadata interop test (#262)
---
 src/iceberg/table_metadata.h            |   3 +-
 src/iceberg/test/metadata_serde_test.cc | 498 +++++++++++++++++++++++---------
 src/iceberg/test/test_common.cc         |  13 +-
 src/iceberg/test/test_common.h          |   4 +
 4 files changed, 368 insertions(+), 150 deletions(-)

diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h
index 6f7a819..11b17eb 100644
--- a/src/iceberg/table_metadata.h
+++ b/src/iceberg/table_metadata.h
@@ -135,7 +135,8 @@ struct ICEBERG_EXPORT TableMetadata {
   /// \brief Get the snapshot of this table with the given id
   Result<std::shared_ptr<iceberg::Snapshot>> SnapshotById(int64_t snapshot_id) 
const;
 
-  friend bool operator==(const TableMetadata& lhs, const TableMetadata& rhs);
+  ICEBERG_EXPORT friend bool operator==(const TableMetadata& lhs,
+                                        const TableMetadata& rhs);
 };
 
 /// \brief Returns a string representation of a SnapshotLogEntry
diff --git a/src/iceberg/test/metadata_serde_test.cc 
b/src/iceberg/test/metadata_serde_test.cc
index 73e5dd3..744a55c 100644
--- a/src/iceberg/test/metadata_serde_test.cc
+++ b/src/iceberg/test/metadata_serde_test.cc
@@ -17,13 +17,10 @@
  * under the License.
  */
 
-#include <filesystem>
-#include <fstream>
 #include <optional>
 #include <string>
 
 #include <gtest/gtest.h>
-#include <nlohmann/json.hpp>
 
 #include "iceberg/partition_field.h"
 #include "iceberg/partition_spec.h"
@@ -32,174 +29,387 @@
 #include "iceberg/snapshot.h"
 #include "iceberg/sort_field.h"
 #include "iceberg/sort_order.h"
+#include "iceberg/statistics_file.h"
 #include "iceberg/table_metadata.h"
 #include "iceberg/transform.h"
 #include "iceberg/type.h"
+#include "matchers.h"
 #include "test_common.h"
 
 namespace iceberg {
 
 namespace {
 
-class MetadataSerdeTest : public ::testing::Test {
- protected:
-  void SetUp() override {}
-};
+void ReadTableMetadataExpectError(const std::string& file_name,
+                                  const std::string& expected_error_substr) {
+  auto result = ReadTableMetadata(file_name);
+  ASSERT_FALSE(result.has_value()) << "Expected parsing to fail for " << 
file_name;
+  EXPECT_THAT(result, HasErrorMessage(expected_error_substr));
+}
+
+void AssertSchema(const TableMetadata& metadata, const Schema& 
expected_schema) {
+  auto schema = metadata.Schema();
+  ASSERT_TRUE(schema.has_value());
+  EXPECT_EQ(*(schema.value().get()), expected_schema);
+}
+
+void AssertSchemaById(const TableMetadata& metadata, int32_t schema_id,
+                      const Schema& expected_schema) {
+  auto schema = metadata.SchemaById(schema_id);
+  ASSERT_TRUE(schema.has_value());
+  EXPECT_EQ(*(schema.value().get()), expected_schema);
+}
+
+void AssertPartitionSpec(const TableMetadata& metadata,
+                         const PartitionSpec& expected_spec) {
+  auto partition_spec = metadata.PartitionSpec();
+  ASSERT_TRUE(partition_spec.has_value());
+  EXPECT_EQ(*(partition_spec.value().get()), expected_spec);
+}
+
+void AssertSortOrder(const TableMetadata& metadata,
+                     const SortOrder& expected_sort_order) {
+  auto sort_order = metadata.SortOrder();
+  ASSERT_TRUE(sort_order.has_value());
+  EXPECT_EQ(*(sort_order.value().get()), expected_sort_order);
+}
+
+void AssertSnapshot(const TableMetadata& metadata, const Snapshot& 
expected_snapshot) {
+  auto snapshot = metadata.Snapshot();
+  ASSERT_TRUE(snapshot.has_value());
+  EXPECT_EQ(*snapshot.value(), expected_snapshot);
+}
+
+void AssertSnapshotById(const TableMetadata& metadata, int64_t snapshot_id,
+                        const Snapshot& expected_snapshot) {
+  auto snapshot = metadata.SnapshotById(snapshot_id);
+  ASSERT_TRUE(snapshot.has_value());
+  EXPECT_EQ(*snapshot.value(), expected_snapshot);
+}
 
 }  // namespace
 
-TEST_F(MetadataSerdeTest, DeserializeV1Valid) {
+TEST(MetadataSerdeTest, DeserializeV1Valid) {
   std::unique_ptr<TableMetadata> metadata;
   ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV1Valid.json", 
&metadata));
 
-  EXPECT_EQ(metadata->format_version, 1);
-  EXPECT_EQ(metadata->table_uuid, "d20125c8-7284-442c-9aea-15fee620737c");
-  EXPECT_EQ(metadata->location, "s3://bucket/test/location");
-  EXPECT_EQ(metadata->last_updated_ms.time_since_epoch().count(), 
1602638573874);
-  EXPECT_EQ(metadata->last_column_id, 3);
-  EXPECT_EQ(metadata->current_snapshot_id, -1);
-
-  // Compare schema
-  EXPECT_EQ(metadata->current_schema_id, std::nullopt);
-  std::vector<SchemaField> schema_fields;
-  schema_fields.emplace_back(/*field_id=*/1, "x", iceberg::int64(),
-                             /*optional=*/false);
-  schema_fields.emplace_back(/*field_id=*/2, "y", iceberg::int64(),
-                             /*optional=*/false);
-  schema_fields.emplace_back(/*field_id=*/3, "z", iceberg::int64(),
-                             /*optional=*/false);
-  auto expected_schema =
-      std::make_shared<Schema>(schema_fields, /*schema_id=*/std::nullopt);
-  auto schema = metadata->Schema();
-  ASSERT_TRUE(schema.has_value());
-  EXPECT_EQ(*(schema.value().get()), *expected_schema);
-
-  // Compare partition spec
-  std::vector<PartitionField> partition_fields;
-  partition_fields.emplace_back(/*source_id=*/1, /*field_id=*/1000, 
/*name=*/"x",
-                                Transform::Identity());
-  auto expected_spec =
-      std::make_shared<PartitionSpec>(expected_schema, /*spec_id=*/0, 
partition_fields);
-  auto partition_spec = metadata->PartitionSpec();
-  ASSERT_TRUE(partition_spec.has_value());
-  EXPECT_EQ(*(partition_spec.value().get()), *expected_spec);
-  auto snapshot = metadata->Snapshot();
-  ASSERT_FALSE(snapshot.has_value());
+  auto expected_schema = std::make_shared<Schema>(
+      std::vector<SchemaField>{SchemaField::MakeRequired(1, "x", int64()),
+                               SchemaField::MakeRequired(2, "y", int64()),
+                               SchemaField::MakeRequired(3, "z", int64())},
+      /*schema_id=*/std::nullopt);
+
+  auto expected_spec = std::make_shared<PartitionSpec>(
+      expected_schema, /*spec_id=*/0,
+      std::vector<PartitionField>{PartitionField(/*source_id=*/1, 
/*field_id=*/1000, "x",
+                                                 Transform::Identity())});
+
+  TableMetadata expected{
+      .format_version = 1,
+      .table_uuid = "d20125c8-7284-442c-9aea-15fee620737c",
+      .location = "s3://bucket/test/location",
+      .last_sequence_number = 0,
+      .last_updated_ms = TimePointMsFromUnixMs(1602638573874).value(),
+      .last_column_id = 3,
+      .schemas = {expected_schema},
+      .current_schema_id = std::nullopt,
+      .partition_specs = {expected_spec},
+      .default_spec_id = 0,
+      .last_partition_id = 1000,
+      .current_snapshot_id = -1,
+      .sort_orders = {SortOrder::Unsorted()},
+      .default_sort_order_id = 0,
+      .next_row_id = 0,
+  };
+
+  ASSERT_EQ(*metadata, expected);
+  AssertSchema(*metadata, *expected_schema);
+  AssertPartitionSpec(*metadata, *expected_spec);
+  ASSERT_FALSE(metadata->Snapshot().has_value());
 }
 
-TEST_F(MetadataSerdeTest, DeserializeV2Valid) {
+TEST(MetadataSerdeTest, DeserializeV2Valid) {
   std::unique_ptr<TableMetadata> metadata;
   ASSERT_NO_FATAL_FAILURE(ReadTableMetadata("TableMetadataV2Valid.json", 
&metadata));
 
-  EXPECT_EQ(metadata->format_version, 2);
-  EXPECT_EQ(metadata->table_uuid, "9c12d441-03fe-4693-9a96-a0705ddf69c1");
-  EXPECT_EQ(metadata->location, "s3://bucket/test/location");
-  EXPECT_EQ(metadata->last_updated_ms.time_since_epoch().count(), 
1602638573590);
-  EXPECT_EQ(metadata->last_column_id, 3);
-
-  // Compare schema
-  EXPECT_EQ(metadata->current_schema_id, 1);
-  std::vector<SchemaField> schema_fields;
-  schema_fields.emplace_back(/*field_id=*/1, "x", iceberg::int64(),
-                             /*optional=*/false);
-  schema_fields.emplace_back(/*field_id=*/2, "y", iceberg::int64(),
-                             /*optional=*/false);
-  schema_fields.emplace_back(/*field_id=*/3, "z", iceberg::int64(),
-                             /*optional=*/false);
-  auto expected_schema = std::make_shared<Schema>(schema_fields, 
/*schema_id=*/1);
-  auto schema = metadata->Schema();
-  ASSERT_TRUE(schema.has_value());
-  EXPECT_EQ(*(schema.value().get()), *expected_schema);
-
-  // schema with ID 1
-  auto schema_v1 = metadata->SchemaById(1);
-  ASSERT_TRUE(schema_v1.has_value());
-  EXPECT_EQ(*(schema_v1.value().get()), *expected_schema);
-
-  // schema with ID 0
-  auto expected_schema_v0 = std::make_shared<Schema>(
-      std::vector<SchemaField>{schema_fields.at(0)}, /*schema_id=*/0);
-  auto schema_v0 = metadata->SchemaById(0);
-  ASSERT_TRUE(schema_v0.has_value());
-  EXPECT_EQ(*(schema_v0.value().get()), *expected_schema_v0);
-
-  // Compare partition spec
-  EXPECT_EQ(metadata->default_spec_id, 0);
-  std::vector<PartitionField> partition_fields;
-  partition_fields.emplace_back(/*source_id=*/1, /*field_id=*/1000, 
/*name=*/"x",
-                                Transform::Identity());
+  auto expected_schema_1 = std::make_shared<Schema>(
+      std::vector<SchemaField>{SchemaField(/*field_id=*/1, "x", 
iceberg::int64(),
+                                           /*optional=*/false)},
+      /*schema_id=*/0);
+
+  auto expected_schema_2 = std::make_shared<Schema>(
+      std::vector<SchemaField>{SchemaField::MakeRequired(1, "x", int64()),
+                               SchemaField::MakeRequired(2, "y", int64()),
+                               SchemaField::MakeRequired(3, "z", int64())},
+      /*schema_id=*/1);
+
+  auto expected_spec = std::make_shared<PartitionSpec>(
+      expected_schema_2, /*spec_id=*/0,
+      std::vector<PartitionField>{PartitionField(/*source_id=*/1, 
/*field_id=*/1000, "x",
+                                                 Transform::Identity())});
+
+  auto expected_sort_order = std::make_shared<SortOrder>(
+      /*order_id=*/3,
+      std::vector<SortField>{SortField(/*source_id=*/2, Transform::Identity(),
+                                       SortDirection::kAscending, 
NullOrder::kFirst),
+                             SortField(/*source_id=*/3, Transform::Bucket(4),
+                                       SortDirection::kDescending, 
NullOrder::kLast)});
+
+  auto expected_snapshot_1 = std::make_shared<Snapshot>(Snapshot{
+      .snapshot_id = 3051729675574597004,
+      .sequence_number = 0,
+      .timestamp_ms = TimePointMsFromUnixMs(1515100955770).value(),
+      .manifest_list = "s3://a/b/1.avro",
+      .summary = {{"operation", "append"}},
+  });
+
+  auto expected_snapshot_2 = std::make_shared<Snapshot>(Snapshot{
+      .snapshot_id = 3055729675574597004,
+      .parent_snapshot_id = 3051729675574597004,
+      .sequence_number = 1,
+      .timestamp_ms = TimePointMsFromUnixMs(1555100955770).value(),
+      .manifest_list = "s3://a/b/2.avro",
+      .summary = {{"operation", "append"}},
+      .schema_id = 1,
+  });
+
+  TableMetadata expected{
+      .format_version = 2,
+      .table_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+      .location = "s3://bucket/test/location",
+      .last_sequence_number = 34,
+      .last_updated_ms = TimePointMsFromUnixMs(1602638573590).value(),
+      .last_column_id = 3,
+      .schemas = {expected_schema_1, expected_schema_2},
+      .current_schema_id = 1,
+      .partition_specs = {expected_spec},
+      .default_spec_id = 0,
+      .last_partition_id = 1000,
+      .current_snapshot_id = 3055729675574597004,
+      .snapshots = {expected_snapshot_1, expected_snapshot_2},
+      .snapshot_log = {SnapshotLogEntry{
+                           .timestamp_ms = 
TimePointMsFromUnixMs(1515100955770).value(),
+                           .snapshot_id = 3051729675574597004},
+                       SnapshotLogEntry{
+                           .timestamp_ms = 
TimePointMsFromUnixMs(1555100955770).value(),
+                           .snapshot_id = 3055729675574597004}},
+      .sort_orders = {expected_sort_order},
+      .default_sort_order_id = 3,
+      .refs = {{"main", std::make_shared<SnapshotRef>(
+                            SnapshotRef{.snapshot_id = 3055729675574597004,
+                                        .retention = SnapshotRef::Branch{}})}},
+      .next_row_id = 0,
+  };
+
+  ASSERT_EQ(*metadata, expected);
+  AssertSchema(*metadata, *expected_schema_2);
+  AssertSchemaById(*metadata, 0, *expected_schema_1);
+  AssertSchemaById(*metadata, 1, *expected_schema_2);
+  AssertPartitionSpec(*metadata, *expected_spec);
+  AssertSortOrder(*metadata, *expected_sort_order);
+  AssertSnapshot(*metadata, *expected_snapshot_2);
+  AssertSnapshotById(*metadata, 3051729675574597004, *expected_snapshot_1);
+  AssertSnapshotById(*metadata, 3055729675574597004, *expected_snapshot_2);
+}
+
+TEST(MetadataSerdeTest, DeserializeV2ValidMinimal) {
+  std::unique_ptr<TableMetadata> metadata;
+  ASSERT_NO_FATAL_FAILURE(
+      ReadTableMetadata("TableMetadataV2ValidMinimal.json", &metadata));
+
+  auto expected_schema = std::make_shared<Schema>(
+      std::vector<SchemaField>{SchemaField::MakeRequired(1, "x", int64()),
+                               SchemaField::MakeRequired(2, "y", int64(), 
"comment"),
+                               SchemaField::MakeRequired(3, "z", int64())},
+      /*schema_id=*/0);
+
+  auto expected_spec = std::make_shared<PartitionSpec>(
+      expected_schema, /*spec_id=*/0,
+      std::vector<PartitionField>{PartitionField(/*source_id=*/1, 
/*field_id=*/1000, "x",
+                                                 Transform::Identity())});
+
+  auto expected_sort_order = std::make_shared<SortOrder>(
+      /*order_id=*/3, std::vector<SortField>{
+                          SortField(/*source_id=*/2, Transform::Identity(),
+                                    SortDirection::kAscending, 
NullOrder::kFirst),
+                          SortField(/*source_id=*/3, Transform::Bucket(4),
+                                    SortDirection::kDescending, 
NullOrder::kLast),
+                      });
+
+  TableMetadata expected{
+      .format_version = 2,
+      .table_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+      .location = "s3://bucket/test/location",
+      .last_sequence_number = 34,
+      .last_updated_ms = TimePointMsFromUnixMs(1602638573590).value(),
+      .last_column_id = 3,
+      .schemas = {expected_schema},
+      .current_schema_id = 0,
+      .partition_specs = {expected_spec},
+      .default_spec_id = 0,
+      .last_partition_id = 1000,
+      .current_snapshot_id = -1,
+      .sort_orders = {expected_sort_order},
+      .default_sort_order_id = 3,
+      .next_row_id = 0,
+  };
+
+  ASSERT_EQ(*metadata, expected);
+  AssertSchema(*metadata, *expected_schema);
+  AssertPartitionSpec(*metadata, *expected_spec);
+  AssertSortOrder(*metadata, *expected_sort_order);
+  ASSERT_FALSE(metadata->Snapshot().has_value());
+}
+
+TEST(MetadataSerdeTest, DeserializeStatisticsFiles) {
+  std::unique_ptr<TableMetadata> metadata;
+  ASSERT_NO_FATAL_FAILURE(
+      ReadTableMetadata("TableMetadataStatisticsFiles.json", &metadata));
+
+  auto expected_schema = std::make_shared<Schema>(
+      std::vector<SchemaField>{SchemaField(/*field_id=*/1, "x", 
iceberg::int64(),
+                                           /*optional=*/false)},
+      /*schema_id=*/0);
+
   auto expected_spec = std::make_shared<PartitionSpec>(expected_schema, 
/*spec_id=*/0,
-                                                       
std::move(partition_fields));
-  auto partition_spec = metadata->PartitionSpec();
-  ASSERT_TRUE(partition_spec.has_value());
-  EXPECT_EQ(*(partition_spec.value().get()), *expected_spec);
-
-  // Compare sort order
-  EXPECT_EQ(metadata->default_sort_order_id, 3);
-  std::vector<SortField> sort_fields;
-  sort_fields.emplace_back(/*source_id=*/2, Transform::Identity(),
-                           SortDirection::kAscending, NullOrder::kFirst);
-  sort_fields.emplace_back(/*source_id=*/3, Transform::Bucket(4),
-                           SortDirection::kDescending, NullOrder::kLast);
-  auto expected_sort_order =
-      std::make_shared<SortOrder>(/*order_id=*/3, std::move(sort_fields));
-  auto sort_order = metadata->SortOrder();
-  ASSERT_TRUE(sort_order.has_value());
-  EXPECT_EQ(*(sort_order.value().get()), *expected_sort_order);
+                                                       
std::vector<PartitionField>{});
 
-  // Compare snapshot
-  EXPECT_EQ(metadata->current_snapshot_id, 3055729675574597004);
-  auto snapshot = metadata->Snapshot();
-  ASSERT_TRUE(snapshot.has_value());
-  EXPECT_EQ(snapshot.value()->snapshot_id, 3055729675574597004);
-
-  // Compare snapshots
-  std::vector<Snapshot> expected_snapshots{
-      {
-          .snapshot_id = 3051729675574597004,
-          .sequence_number = 0,
-          .timestamp_ms = TimePointMsFromUnixMs(1515100955770).value(),
-          .manifest_list = "s3://a/b/1.avro",
-          .summary = {{"operation", "append"}},
-      },
-      {
+  auto expected_snapshot = std::make_shared<Snapshot>(Snapshot{
+      .snapshot_id = 3055729675574597004,
+      .sequence_number = 1,
+      .timestamp_ms = TimePointMsFromUnixMs(1555100955770).value(),
+      .manifest_list = "s3://a/b/2.avro",
+      .summary = {{"operation", "append"}},
+      .schema_id = 0,
+  });
+
+  auto expected_stats_file = std::make_shared<StatisticsFile>(StatisticsFile{
+      .snapshot_id = 3055729675574597004,
+      .path = "s3://a/b/stats.puffin",
+      .file_size_in_bytes = 413,
+      .file_footer_size_in_bytes = 42,
+      .blob_metadata =
+          {
+              BlobMetadata{
+                  .type = "ndv",
+                  .source_snapshot_id = 3055729675574597004,
+                  .source_snapshot_sequence_number = 1,
+                  .fields = {1},
+                  .properties = {},
+              },
+          },
+  });
+
+  TableMetadata expected{
+      .format_version = 2,
+      .table_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+      .location = "s3://bucket/test/location",
+      .last_sequence_number = 34,
+      .last_updated_ms = TimePointMsFromUnixMs(1602638573590).value(),
+      .last_column_id = 3,
+      .schemas = {expected_schema},
+      .current_schema_id = 0,
+      .partition_specs = {expected_spec},
+      .default_spec_id = 0,
+      .last_partition_id = 1000,
+      .properties = {},
+      .current_snapshot_id = 3055729675574597004,
+      .snapshots = {expected_snapshot},
+      .snapshot_log = {},
+      .metadata_log = {},
+      .sort_orders = {SortOrder::Unsorted()},
+      .default_sort_order_id = 0,
+      .refs = {{"main", std::make_shared<SnapshotRef>(
+                            SnapshotRef{.snapshot_id = 3055729675574597004,
+                                        .retention = SnapshotRef::Branch{}})}},
+      .statistics = {expected_stats_file},
+      .partition_statistics = {},
+      .next_row_id = 0,
+  };
+
+  ASSERT_EQ(*metadata, expected);
+}
+
+TEST(MetadataSerdeTest, DeserializePartitionStatisticsFiles) {
+  std::unique_ptr<TableMetadata> metadata;
+  ASSERT_NO_FATAL_FAILURE(
+      ReadTableMetadata("TableMetadataPartitionStatisticsFiles.json", 
&metadata));
+
+  TableMetadata expected{
+      .format_version = 2,
+      .table_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+      .location = "s3://bucket/test/location",
+      .last_sequence_number = 34,
+      .last_updated_ms = TimePointMsFromUnixMs(1602638573590).value(),
+      .last_column_id = 3,
+      .schemas = {std::make_shared<Schema>(
+          std::vector<SchemaField>{SchemaField(/*field_id=*/1, "x", 
iceberg::int64(),
+                                               /*optional=*/false)},
+          /*schema_id=*/0)},
+      .current_schema_id = 0,
+      .partition_specs = {PartitionSpec::Unpartitioned()},
+      .default_spec_id = 0,
+      .last_partition_id = 1000,
+      .properties = {},
+      .current_snapshot_id = 3055729675574597004,
+      .snapshots = {std::make_shared<Snapshot>(Snapshot{
           .snapshot_id = 3055729675574597004,
-          .parent_snapshot_id = 3051729675574597004,
           .sequence_number = 1,
           .timestamp_ms = TimePointMsFromUnixMs(1555100955770).value(),
           .manifest_list = "s3://a/b/2.avro",
           .summary = {{"operation", "append"}},
-          .schema_id = 1,
-      }};
-  EXPECT_EQ(metadata->snapshots.size(), expected_snapshots.size());
-  for (size_t i = 0; i < expected_snapshots.size(); ++i) {
-    EXPECT_EQ(*metadata->snapshots[i], expected_snapshots[i]);
-  }
-
-  // snapshot with ID 3051729675574597004
-  auto snapshot_v0 = metadata->SnapshotById(3051729675574597004);
-  ASSERT_TRUE(snapshot_v0.has_value());
-  EXPECT_EQ(*snapshot_v0.value(), expected_snapshots[0]);
-
-  // snapshot with ID 3055729675574597004
-  auto snapshot_v1 = metadata->SnapshotById(3055729675574597004);
-  ASSERT_TRUE(snapshot_v1.has_value());
-  EXPECT_EQ(*snapshot_v1.value(), expected_snapshots[1]);
-
-  // Compare snapshot logs
-  std::vector<SnapshotLogEntry> expected_snapshot_log{
-      {
-          .timestamp_ms = TimePointMsFromUnixMs(1515100955770).value(),
-          .snapshot_id = 3051729675574597004,
-      },
-      {
-          .timestamp_ms = TimePointMsFromUnixMs(1555100955770).value(),
-          .snapshot_id = 3055729675574597004,
-      }};
-  EXPECT_EQ(metadata->snapshot_log.size(), 2);
-  for (size_t i = 0; i < expected_snapshots.size(); ++i) {
-    EXPECT_EQ(metadata->snapshot_log[i], expected_snapshot_log[i]);
-  }
+          .schema_id = 0,
+      })},
+      .snapshot_log = {},
+      .metadata_log = {},
+      .sort_orders = {SortOrder::Unsorted()},
+      .default_sort_order_id = 0,
+      .refs = {{"main", std::make_shared<SnapshotRef>(
+                            SnapshotRef{.snapshot_id = 3055729675574597004,
+                                        .retention = SnapshotRef::Branch{}})}},
+      .statistics = {},
+      .partition_statistics = {std::make_shared<PartitionStatisticsFile>(
+          PartitionStatisticsFile{.snapshot_id = 3055729675574597004,
+                                  .path = "s3://a/b/partition-stats.parquet",
+                                  .file_size_in_bytes = 43})},
+      .next_row_id = 0,
+  };
+
+  ASSERT_EQ(*metadata, expected);
+}
+
+TEST(MetadataSerdeTest, DeserializeUnsupportedVersion) {
+  ReadTableMetadataExpectError("TableMetadataUnsupportedVersion.json",
+                               "Cannot read unsupported version");
+}
+
+TEST(MetadataSerdeTest, DeserializeV1MissingSchemaType) {
+  ReadTableMetadataExpectError("TableMetadataV1MissingSchemaType.json", 
"Missing 'type'");
+}
+
+TEST(MetadataSerdeTest, DeserializeV2CurrentSchemaNotFound) {
+  ReadTableMetadataExpectError("TableMetadataV2CurrentSchemaNotFound.json",
+                               "Cannot find schema with current-schema-id");
+}
+
+TEST(MetadataSerdeTest, DeserializeV2MissingLastPartitionId) {
+  ReadTableMetadataExpectError("TableMetadataV2MissingLastPartitionId.json",
+                               "last-partition-id must exist");
+}
+
+TEST(MetadataSerdeTest, DeserializeV2MissingPartitionSpecs) {
+  ReadTableMetadataExpectError("TableMetadataV2MissingPartitionSpecs.json",
+                               "partition-specs must exist");
+}
+
+TEST(MetadataSerdeTest, DeserializeV2MissingSchemas) {
+  ReadTableMetadataExpectError("TableMetadataV2MissingSchemas.json",
+                               "schemas must exist");
+}
+
+TEST(MetadataSerdeTest, DeserializeV2MissingSortOrder) {
+  ReadTableMetadataExpectError("TableMetadataV2MissingSortOrder.json",
+                               "sort-orders must exist");
 }
 
 }  // namespace iceberg
diff --git a/src/iceberg/test/test_common.cc b/src/iceberg/test/test_common.cc
index 25fa8f2..8c992c9 100644
--- a/src/iceberg/test/test_common.cc
+++ b/src/iceberg/test/test_common.cc
@@ -21,7 +21,6 @@
 
 #include <filesystem>
 #include <fstream>
-#include <optional>
 #include <sstream>
 
 #include <gtest/gtest.h>
@@ -48,14 +47,18 @@ void ReadJsonFile(const std::string& file_name, 
std::string* content) {
 
 void ReadTableMetadata(const std::string& file_name,
                        std::unique_ptr<TableMetadata>* metadata) {
+  auto result = ReadTableMetadata(file_name);
+  ASSERT_TRUE(result.has_value()) << "Failed to parse table metadata from " << 
file_name
+                                  << ": " << result.error().message;
+  *metadata = std::move(result.value());
+}
+
+Result<std::unique_ptr<TableMetadata>> ReadTableMetadata(const std::string& 
file_name) {
   std::string json_content;
   ReadJsonFile(file_name, &json_content);
 
   nlohmann::json json = nlohmann::json::parse(json_content);
-  auto result = TableMetadataFromJson(json);
-  ASSERT_TRUE(result.has_value()) << "Failed to parse table metadata from " << 
file_name
-                                  << ": " << result.error().message;
-  *metadata = std::move(result.value());
+  return TableMetadataFromJson(json);
 }
 
 }  // namespace iceberg
diff --git a/src/iceberg/test/test_common.h b/src/iceberg/test/test_common.h
index a9dba8c..e41a3f3 100644
--- a/src/iceberg/test/test_common.h
+++ b/src/iceberg/test/test_common.h
@@ -22,6 +22,7 @@
 #include <memory>
 #include <string>
 
+#include "iceberg/result.h"
 #include "iceberg/type_fwd.h"
 
 namespace iceberg {
@@ -36,4 +37,7 @@ void ReadJsonFile(const std::string& file_name, std::string* 
content);
 void ReadTableMetadata(const std::string& file_name,
                        std::unique_ptr<TableMetadata>* metadata);
 
+/// \brief Read table metadata from a JSON file and return the Result directly
+Result<std::unique_ptr<TableMetadata>> ReadTableMetadata(const std::string& 
file_name);
+
 }  // namespace iceberg

Reply via email to