This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new ba92781c fix: add identifier-field-ids JSON 
serialization/deserialization (#451)
ba92781c is described below

commit ba92781cf8b701d8a9d0d15b06fda6101dc8d6c1
Author: Xinli Shang <[email protected]>
AuthorDate: Tue Dec 30 00:09:44 2025 -0800

    fix: add identifier-field-ids JSON serialization/deserialization (#451)
    
    Fixes the TODO in json_internal.cc for identifier-field-ids support.
---
 src/iceberg/json_internal.cc            | 22 ++++++++++++---
 src/iceberg/test/metadata_serde_test.cc |  3 +-
 src/iceberg/test/schema_json_test.cc    | 49 +++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc
index 4a36ebf2..e35e36cf 100644
--- a/src/iceberg/json_internal.cc
+++ b/src/iceberg/json_internal.cc
@@ -42,6 +42,7 @@
 #include "iceberg/table_properties.h"
 #include "iceberg/transform.h"
 #include "iceberg/type.h"
+#include "iceberg/util/checked_cast.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
 #include "iceberg/util/json_util_internal.h"
 #include "iceberg/util/macros.h"
@@ -309,7 +310,9 @@ nlohmann::json ToJson(const Type& type) {
 nlohmann::json ToJson(const Schema& schema) {
   nlohmann::json json = ToJson(static_cast<const Type&>(schema));
   json[kSchemaId] = schema.schema_id();
-  // TODO(gangwu): add identifier-field-ids.
+  if (!schema.IdentifierFieldIds().empty()) {
+    json[kIdentifierFieldIds] = schema.IdentifierFieldIds();
+  }
   return json;
 }
 
@@ -473,10 +476,21 @@ Result<std::unique_ptr<Schema>> SchemaFromJson(const 
nlohmann::json& json) {
   if (type->type_id() != TypeId::kStruct) [[unlikely]] {
     return JsonParseError("Schema must be a struct type, but got {}", 
SafeDumpJson(json));
   }
+  auto& struct_type = internal::checked_cast<StructType&>(*type);
+
+  std::vector<SchemaField> fields;
+  fields.reserve(struct_type.fields().size());
+  for (auto& field : struct_type.fields()) {
+    fields.emplace_back(std::move(field));
+  }
+
+  ICEBERG_ASSIGN_OR_RAISE(
+      auto identifier_field_ids,
+      GetJsonValueOrDefault<std::vector<int32_t>>(json, kIdentifierFieldIds));
 
-  auto& struct_type = static_cast<StructType&>(*type);
-  auto schema_id = schema_id_opt.value_or(Schema::kInitialSchemaId);
-  return FromStructType(std::move(struct_type), schema_id);
+  return std::make_unique<Schema>(std::move(fields),
+                                  
schema_id_opt.value_or(Schema::kInitialSchemaId),
+                                  std::move(identifier_field_ids));
 }
 
 nlohmann::json ToJson(const PartitionField& partition_field) {
diff --git a/src/iceberg/test/metadata_serde_test.cc 
b/src/iceberg/test/metadata_serde_test.cc
index 62682b80..6b8e37ad 100644
--- a/src/iceberg/test/metadata_serde_test.cc
+++ b/src/iceberg/test/metadata_serde_test.cc
@@ -143,7 +143,8 @@ TEST(MetadataSerdeTest, DeserializeV2Valid) {
       std::vector<SchemaField>{SchemaField::MakeRequired(1, "x", int64()),
                                SchemaField::MakeRequired(2, "y", int64()),
                                SchemaField::MakeRequired(3, "z", int64())},
-      /*schema_id=*/1);
+      /*schema_id=*/1,
+      /*identifier_field_ids=*/std::vector<int32_t>{1, 2});
 
   auto expected_spec_result = PartitionSpec::Make(
       /*spec_id=*/0,
diff --git a/src/iceberg/test/schema_json_test.cc 
b/src/iceberg/test/schema_json_test.cc
index 1e934cce..da4c779a 100644
--- a/src/iceberg/test/schema_json_test.cc
+++ b/src/iceberg/test/schema_json_test.cc
@@ -27,6 +27,7 @@
 #include "iceberg/json_internal.h"
 #include "iceberg/schema.h"
 #include "iceberg/schema_field.h"
+#include "iceberg/test/matchers.h"
 #include "iceberg/type.h"
 
 namespace iceberg {
@@ -133,4 +134,52 @@ TEST(SchemaJsonTest, RoundTrip) {
   ASSERT_EQ(dumped_json, json);
 }
 
+TEST(SchemaJsonTest, IdentifierFieldIds) {
+  // Test schema with identifier-field-ids
+  constexpr std::string_view json_with_identifier_str =
+      R"({"fields":[{"id":1,"name":"id","required":true,"type":"long"},
+                    {"id":2,"name":"data","required":false,"type":"string"}],
+          "identifier-field-ids":[1],
+          "schema-id":1,
+          "type":"struct"})";
+
+  auto json_with_identifiers = nlohmann::json::parse(json_with_identifier_str);
+  ICEBERG_UNWRAP_OR_FAIL(auto schema_with_identifers,
+                         SchemaFromJson(json_with_identifiers));
+  ASSERT_EQ(schema_with_identifers->fields().size(), 2);
+  ASSERT_EQ(schema_with_identifers->schema_id(), 1);
+  ASSERT_EQ(schema_with_identifers->IdentifierFieldIds().size(), 1);
+  ASSERT_EQ(schema_with_identifers->IdentifierFieldIds()[0], 1);
+  ASSERT_EQ(ToJson(*schema_with_identifers), json_with_identifiers);
+
+  // Test schema without identifier-field-ids
+  constexpr std::string_view json_without_identifiers_str =
+      R"({"fields":[{"id":1,"name":"id","required":true,"type":"int"},
+                    {"id":2,"name":"name","required":false,"type":"string"}],
+          "schema-id":1,
+          "type":"struct"})";
+
+  auto json_without_identifiers = 
nlohmann::json::parse(json_without_identifiers_str);
+  ICEBERG_UNWRAP_OR_FAIL(auto schema_without_identifiers,
+                         SchemaFromJson(json_without_identifiers));
+  ASSERT_TRUE(schema_without_identifiers->IdentifierFieldIds().empty());
+  ASSERT_EQ(ToJson(*schema_without_identifiers), json_without_identifiers);
+
+  // Test schema with multiple identifier fields
+  constexpr std::string_view json_multi_identifiers_str =
+      R"({"fields":[{"id":1,"name":"user_id","required":true,"type":"long"},
+                    {"id":2,"name":"org_id","required":true,"type":"long"},
+                    {"id":3,"name":"data","required":false,"type":"string"}],
+          "identifier-field-ids":[1,2],
+          "schema-id":2,
+          "type":"struct"})";
+  auto json_multi_identifiers = 
nlohmann::json::parse(json_multi_identifiers_str);
+  ICEBERG_UNWRAP_OR_FAIL(auto schema_multi_identifiers,
+                         SchemaFromJson(json_multi_identifiers));
+  ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds().size(), 2);
+  ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds()[0], 1);
+  ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds()[1], 2);
+  ASSERT_EQ(ToJson(*schema_multi_identifiers), json_multi_identifiers);
+}
+
 }  // namespace iceberg

Reply via email to