This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new ba92781c fix: add identifier-field-ids JSON
serialization/deserialization (#451)
ba92781c is described below
commit ba92781cf8b701d8a9d0d15b06fda6101dc8d6c1
Author: Xinli Shang <[email protected]>
AuthorDate: Tue Dec 30 00:09:44 2025 -0800
fix: add identifier-field-ids JSON serialization/deserialization (#451)
Fixes the TODO in json_internal.cc for identifier-field-ids support.
---
src/iceberg/json_internal.cc | 22 ++++++++++++---
src/iceberg/test/metadata_serde_test.cc | 3 +-
src/iceberg/test/schema_json_test.cc | 49 +++++++++++++++++++++++++++++++++
3 files changed, 69 insertions(+), 5 deletions(-)
diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc
index 4a36ebf2..e35e36cf 100644
--- a/src/iceberg/json_internal.cc
+++ b/src/iceberg/json_internal.cc
@@ -42,6 +42,7 @@
#include "iceberg/table_properties.h"
#include "iceberg/transform.h"
#include "iceberg/type.h"
+#include "iceberg/util/checked_cast.h"
#include "iceberg/util/formatter.h" // IWYU pragma: keep
#include "iceberg/util/json_util_internal.h"
#include "iceberg/util/macros.h"
@@ -309,7 +310,9 @@ nlohmann::json ToJson(const Type& type) {
nlohmann::json ToJson(const Schema& schema) {
nlohmann::json json = ToJson(static_cast<const Type&>(schema));
json[kSchemaId] = schema.schema_id();
- // TODO(gangwu): add identifier-field-ids.
+ if (!schema.IdentifierFieldIds().empty()) {
+ json[kIdentifierFieldIds] = schema.IdentifierFieldIds();
+ }
return json;
}
@@ -473,10 +476,21 @@ Result<std::unique_ptr<Schema>> SchemaFromJson(const
nlohmann::json& json) {
if (type->type_id() != TypeId::kStruct) [[unlikely]] {
return JsonParseError("Schema must be a struct type, but got {}",
SafeDumpJson(json));
}
+ auto& struct_type = internal::checked_cast<StructType&>(*type);
+
+ std::vector<SchemaField> fields;
+ fields.reserve(struct_type.fields().size());
+ for (auto& field : struct_type.fields()) {
+ fields.emplace_back(std::move(field));
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto identifier_field_ids,
+ GetJsonValueOrDefault<std::vector<int32_t>>(json, kIdentifierFieldIds));
- auto& struct_type = static_cast<StructType&>(*type);
- auto schema_id = schema_id_opt.value_or(Schema::kInitialSchemaId);
- return FromStructType(std::move(struct_type), schema_id);
+ return std::make_unique<Schema>(std::move(fields),
+
schema_id_opt.value_or(Schema::kInitialSchemaId),
+ std::move(identifier_field_ids));
}
nlohmann::json ToJson(const PartitionField& partition_field) {
diff --git a/src/iceberg/test/metadata_serde_test.cc
b/src/iceberg/test/metadata_serde_test.cc
index 62682b80..6b8e37ad 100644
--- a/src/iceberg/test/metadata_serde_test.cc
+++ b/src/iceberg/test/metadata_serde_test.cc
@@ -143,7 +143,8 @@ TEST(MetadataSerdeTest, DeserializeV2Valid) {
std::vector<SchemaField>{SchemaField::MakeRequired(1, "x", int64()),
SchemaField::MakeRequired(2, "y", int64()),
SchemaField::MakeRequired(3, "z", int64())},
- /*schema_id=*/1);
+ /*schema_id=*/1,
+ /*identifier_field_ids=*/std::vector<int32_t>{1, 2});
auto expected_spec_result = PartitionSpec::Make(
/*spec_id=*/0,
diff --git a/src/iceberg/test/schema_json_test.cc
b/src/iceberg/test/schema_json_test.cc
index 1e934cce..da4c779a 100644
--- a/src/iceberg/test/schema_json_test.cc
+++ b/src/iceberg/test/schema_json_test.cc
@@ -27,6 +27,7 @@
#include "iceberg/json_internal.h"
#include "iceberg/schema.h"
#include "iceberg/schema_field.h"
+#include "iceberg/test/matchers.h"
#include "iceberg/type.h"
namespace iceberg {
@@ -133,4 +134,52 @@ TEST(SchemaJsonTest, RoundTrip) {
ASSERT_EQ(dumped_json, json);
}
+TEST(SchemaJsonTest, IdentifierFieldIds) {
+ // Test schema with identifier-field-ids
+ constexpr std::string_view json_with_identifier_str =
+ R"({"fields":[{"id":1,"name":"id","required":true,"type":"long"},
+ {"id":2,"name":"data","required":false,"type":"string"}],
+ "identifier-field-ids":[1],
+ "schema-id":1,
+ "type":"struct"})";
+
+ auto json_with_identifiers = nlohmann::json::parse(json_with_identifier_str);
+ ICEBERG_UNWRAP_OR_FAIL(auto schema_with_identifers,
+ SchemaFromJson(json_with_identifiers));
+ ASSERT_EQ(schema_with_identifers->fields().size(), 2);
+ ASSERT_EQ(schema_with_identifers->schema_id(), 1);
+ ASSERT_EQ(schema_with_identifers->IdentifierFieldIds().size(), 1);
+ ASSERT_EQ(schema_with_identifers->IdentifierFieldIds()[0], 1);
+ ASSERT_EQ(ToJson(*schema_with_identifers), json_with_identifiers);
+
+ // Test schema without identifier-field-ids
+ constexpr std::string_view json_without_identifiers_str =
+ R"({"fields":[{"id":1,"name":"id","required":true,"type":"int"},
+ {"id":2,"name":"name","required":false,"type":"string"}],
+ "schema-id":1,
+ "type":"struct"})";
+
+ auto json_without_identifiers =
nlohmann::json::parse(json_without_identifiers_str);
+ ICEBERG_UNWRAP_OR_FAIL(auto schema_without_identifiers,
+ SchemaFromJson(json_without_identifiers));
+ ASSERT_TRUE(schema_without_identifiers->IdentifierFieldIds().empty());
+ ASSERT_EQ(ToJson(*schema_without_identifiers), json_without_identifiers);
+
+ // Test schema with multiple identifier fields
+ constexpr std::string_view json_multi_identifiers_str =
+ R"({"fields":[{"id":1,"name":"user_id","required":true,"type":"long"},
+ {"id":2,"name":"org_id","required":true,"type":"long"},
+ {"id":3,"name":"data","required":false,"type":"string"}],
+ "identifier-field-ids":[1,2],
+ "schema-id":2,
+ "type":"struct"})";
+ auto json_multi_identifiers =
nlohmann::json::parse(json_multi_identifiers_str);
+ ICEBERG_UNWRAP_OR_FAIL(auto schema_multi_identifiers,
+ SchemaFromJson(json_multi_identifiers));
+ ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds().size(), 2);
+ ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds()[0], 1);
+ ASSERT_EQ(schema_multi_identifiers->IdentifierFieldIds()[1], 2);
+ ASSERT_EQ(ToJson(*schema_multi_identifiers), json_multi_identifiers);
+}
+
} // namespace iceberg